summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.h4
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp98
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp459
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp42
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h6
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h13
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp126
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h10
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp109
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h84
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp98
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h30
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h8
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h10
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp1230
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h31
-rw-r--r--contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp348
-rw-r--r--contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp284
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h1
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp39
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp1325
-rw-r--r--contrib/llvm/lib/CodeGen/CoreCLRGC.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp87
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp20
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp36
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/FuncletLayout.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/GCRootLowering.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalMerge.cpp138
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp189
-rw-r--r--contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp201
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp76
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugValues.cpp405
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp107
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h1
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp117
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp129
-rw-r--r--contrib/llvm/lib/CodeGen/LivePhysRegs.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.h2
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp27
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp451
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.h102
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp1595
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.h54
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp432
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.cpp767
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp505
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp360
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp75
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCombiner.cpp202
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp88
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp89
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp261
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp86
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp232
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp326
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/ParallelCG.cpp96
-rw-r--r--contrib/llvm/lib/CodeGen/Passes.cpp144
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp1017
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp149
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp140
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp53
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp256
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp373
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp313
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2644
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp142
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp196
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1283
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp272
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp304
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp124
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h110
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp58
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp275
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp34
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp530
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1048
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h109
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp133
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp261
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp137
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp267
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrap.cpp175
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/StackMaps.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp20
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp115
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp310
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp148
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp80
-rw-r--r--contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp185
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp302
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp166
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp3506
158 files changed, 17505 insertions, 10454 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5fe4c4b..4060db7 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -142,16 +142,15 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
assert(!State);
State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
- bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
+ bool IsReturnBlock = BB->isReturnBlock();
std::vector<unsigned> &KillIndices = State->GetKillIndices();
std::vector<unsigned> &DefIndices = State->GetDefIndices();
// Examine the live-in regs of all successors.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
- E = (*SI)->livein_end(); I != E; ++I) {
- for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ for (const auto &LI : (*SI)->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
@@ -365,9 +364,11 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
- // defined in a call must not be changed (ABI).
+ // defined in a call must not be changed (ABI). Inline assembly may
+ // reference either system calls or the register directly. Skip it until we
+ // can tell user specified registers from compiler-specified.
if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
- TII->isPredicated(MI)) {
+ TII->isPredicated(MI) || MI->isInlineAsm()) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
}
@@ -429,6 +430,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// If MI's uses have special allocation requirement, don't allow
// any use registers to be changed. Also assume all registers
// used in a call must not be changed (ABI).
+ // Inline Assembly register uses also cannot be safely changed.
// FIXME: The issue with predicated instruction is more complex. We are being
// conservatively here because the kill markers cannot be trusted after
// if-conversion:
@@ -444,7 +446,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// changed.
bool Special = MI->isCall() ||
MI->hasExtraSrcRegAllocReq() ||
- TII->isPredicated(MI);
+ TII->isPredicated(MI) || MI->isInlineAsm();
// Scan the register uses for this instruction and update
// live-ranges, groups and RegRefs.
@@ -509,15 +511,8 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
// Check all references that need rewriting for Reg. For each, use
// the corresponding register class to narrow the set of registers
// that are appropriate for renaming.
- std::pair<std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator,
- std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator>
- Range = State->GetRegRefs().equal_range(Reg);
- for (std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
- QE = Range.second; Q != QE; ++Q) {
- const TargetRegisterClass *RC = Q->second.RC;
+ for (const auto &Q : make_range(State->GetRegRefs().equal_range(Reg))) {
+ const TargetRegisterClass *RC = Q.second.RC;
if (!RC) continue;
BitVector RCBV = TRI->getAllocatableSet(MF, RC);
@@ -685,9 +680,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also
// defines 'NewReg' via an early-clobber operand.
- auto Range = RegRefs.equal_range(Reg);
- for (auto Q = Range.first, QE = Range.second; Q != QE; ++Q) {
- auto UseMI = Q->second.Operand->getParent();
+ for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+ MachineInstr *UseMI = Q.second.Operand->getParent();
int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI);
if (Idx == -1)
continue;
@@ -698,6 +692,20 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
}
}
+ // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining
+ // 'Reg' is an early-clobber define and that instruction also uses
+ // 'NewReg'.
+ for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+ if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber())
+ continue;
+
+ MachineInstr *DefMI = Q.second.Operand->getParent();
+ if (DefMI->readsRegister(NewReg, TRI)) {
+ DEBUG(dbgs() << "(ec)");
+ goto next_super_reg;
+ }
+ }
+
// Record that 'Reg' can be renamed to 'NewReg'.
RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
}
@@ -920,23 +928,16 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Update the references to the old register CurrReg to
// refer to the new register NewReg.
- std::pair<std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator,
- std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator>
- Range = RegRefs.equal_range(CurrReg);
- for (std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator
- Q = Range.first, QE = Range.second; Q != QE; ++Q) {
- Q->second.Operand->setReg(NewReg);
+ for (const auto &Q : make_range(RegRefs.equal_range(CurrReg))) {
+ Q.second.Operand->setReg(NewReg);
// If the SU for the instruction being updated has debug
// information related to the anti-dependency register, make
// sure to update that as well.
- const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+ const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()];
if (!SU) continue;
for (DbgValueVector::iterator DVI = DbgValues.begin(),
DVE = DbgValues.end(); DVI != DVE; ++DVI)
- if (DVI->second == Q->second.Operand->getParent())
+ if (DVI->second == Q.second.Operand->getParent())
UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
index dc9bcff..40451c0 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -29,12 +29,13 @@ using namespace llvm;
// Compare VirtRegMap::getRegAllocPref().
AllocationOrder::AllocationOrder(unsigned VirtReg,
const VirtRegMap &VRM,
- const RegisterClassInfo &RegClassInfo)
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix)
: Pos(0) {
const MachineFunction &MF = VRM.getMachineFunction();
const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
- TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM);
+ TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix);
rewind();
DEBUG({
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
index 02b2d92..2aee3a6 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.h
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -24,6 +24,7 @@ namespace llvm {
class RegisterClassInfo;
class VirtRegMap;
+class LiveRegMatrix;
class LLVM_LIBRARY_VISIBILITY AllocationOrder {
SmallVector<MCPhysReg, 16> Hints;
@@ -37,7 +38,8 @@ public:
/// @param RegClassInfo Information about reserved and allocatable registers.
AllocationOrder(unsigned VirtReg,
const VirtRegMap &VRM,
- const RegisterClassInfo &RegClassInfo);
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix);
/// Get the allocation order without reordered hints.
ArrayRef<MCPhysReg> getOrder() const { return Order; }
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 98d4c8a..75579a2 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -25,6 +26,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
@@ -515,7 +517,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
if (isa<DbgInfoIntrinsic>(BBI))
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
- !isSafeToSpeculativelyExecute(BBI))
+ !isSafeToSpeculativelyExecute(&*BBI))
return false;
}
@@ -643,3 +645,97 @@ bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
return !GS.IsCompared;
}
+
+static void collectFuncletMembers(
+ DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
+ const MachineBasicBlock *MBB) {
+ // Add this MBB to our funclet.
+ auto P = FuncletMembership.insert(std::make_pair(MBB, Funclet));
+
+ // Don't revisit blocks.
+ if (!P.second) {
+ assert(P.first->second == Funclet && "MBB is part of two funclets!");
+ return;
+ }
+
+ bool IsReturn = false;
+ int NumTerminators = 0;
+ for (const MachineInstr &MI : MBB->terminators()) {
+ IsReturn |= MI.isReturn();
+ ++NumTerminators;
+ }
+ assert((!IsReturn || NumTerminators == 1) &&
+ "Expected only one terminator when a return is present!");
+
+ // Returns are boundaries where funclet transfer can occur, don't follow
+ // successors.
+ if (IsReturn)
+ return;
+
+ for (const MachineBasicBlock *SMBB : MBB->successors())
+ if (!SMBB->isEHPad())
+ collectFuncletMembers(FuncletMembership, Funclet, SMBB);
+}
+
+DenseMap<const MachineBasicBlock *, int>
+llvm::getFuncletMembership(const MachineFunction &MF) {
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership;
+
+ // We don't have anything to do if there aren't any EH pads.
+ if (!MF.getMMI().hasEHFunclets())
+ return FuncletMembership;
+
+ int EntryBBNumber = MF.front().getNumber();
+ bool IsSEH = isAsynchronousEHPersonality(
+ classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<const MachineBasicBlock *, 16> FuncletBlocks;
+ SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;
+ SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;
+ SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;
+ for (const MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry()) {
+ FuncletBlocks.push_back(&MBB);
+ } else if (IsSEH && MBB.isEHPad()) {
+ SEHCatchPads.push_back(&MBB);
+ } else if (MBB.pred_empty()) {
+ UnreachableBlocks.push_back(&MBB);
+ }
+
+ MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+ // CatchPads are not funclets for SEH so do not consider CatchRet to
+ // transfer control to another funclet.
+ if (MBBI->getOpcode() != TII->getCatchReturnOpcode())
+ continue;
+
+ // FIXME: SEH CatchPads are not necessarily in the parent function:
+ // they could be inside a finally block.
+ const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
+ const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
+ CatchRetSuccessors.push_back(
+ {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
+ }
+
+ // We don't have anything to do if there aren't any EH pads.
+ if (FuncletBlocks.empty())
+ return FuncletMembership;
+
+ // Identify all the basic blocks reachable from the function entry.
+ collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front());
+ // All blocks not part of a funclet are in the parent function.
+ for (const MachineBasicBlock *MBB : UnreachableBlocks)
+ collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
+ // Next, identify all the blocks inside the funclets.
+ for (const MachineBasicBlock *MBB : FuncletBlocks)
+ collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB);
+ // SEH CatchPads aren't really funclets, handle them separately.
+ for (const MachineBasicBlock *MBB : SEHCatchPads)
+ collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
+ // Finally, identify all the targets of a catchret.
+ for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
+ CatchRetSuccessors)
+ collectFuncletMembers(FuncletMembership, CatchRetPair.second,
+ CatchRetPair.first);
+ return FuncletMembership;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 0bad795..ade2d71 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -73,7 +73,6 @@ void ARMException::endFunction(const MachineFunction *MF) {
const Function *Per = nullptr;
if (F->hasPersonalityFn())
Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
- assert(!MMI->getPersonality() || Per == MMI->getPersonality());
bool forceEmitPersonality =
F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
F->needsUnwindTableEntry();
@@ -115,9 +114,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
Entry = TypeInfos.size();
}
- for (std::vector<const GlobalValue *>::const_reverse_iterator
- I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
- const GlobalValue *GV = *I;
+ for (const GlobalValue *GV : reverse(TypeInfos)) {
if (VerboseAsm)
Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 125047e..be7eafb 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -135,11 +135,14 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
return *TM.getObjFileLowering();
}
-/// getDataLayout - Return information about data layout.
const DataLayout &AsmPrinter::getDataLayout() const {
- return *TM.getDataLayout();
+ return MMI->getModule()->getDataLayout();
}
+// Do not use the cached DataLayout because some client use it without a Module
+// (llmv-dsymutil, llvm-dwarfdump).
+unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); }
+
const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
assert(MF && "getSubtargetInfo requires a valid MachineFunction!");
return MF->getSubtarget<MCSubtargetInfo>();
@@ -193,10 +196,18 @@ bool AsmPrinter::doInitialization(Module &M) {
unsigned Major, Minor, Update;
TT.getOSVersion(Major, Minor, Update);
// If there is a version specified, Major will be non-zero.
- if (Major)
- OutStreamer->EmitVersionMin((TT.isMacOSX() ?
- MCVM_OSXVersionMin : MCVM_IOSVersionMin),
- Major, Minor, Update);
+ if (Major) {
+ MCVersionMinType VersionType;
+ if (TT.isWatchOS())
+ VersionType = MCVM_WatchOSVersionMin;
+ else if (TT.isTvOS())
+ VersionType = MCVM_TvOSVersionMin;
+ else if (TT.isMacOSX())
+ VersionType = MCVM_OSXVersionMin;
+ else
+ VersionType = MCVM_IOSVersionMin;
+ OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
+ }
}
// Allow the target to emit any magic that it wants at the start of the file.
@@ -224,28 +235,20 @@ bool AsmPrinter::doInitialization(Module &M) {
TM.getTargetFeatureString()));
OutStreamer->AddComment("Start of file scope inline assembly");
OutStreamer->AddBlankLine();
- EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI, TM.Options.MCOptions);
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n",
+ OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions);
OutStreamer->AddComment("End of file scope inline assembly");
OutStreamer->AddBlankLine();
}
if (MAI->doesSupportDebugInformation()) {
- bool skip_dwarf = false;
- if (TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+ bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
+ if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this),
DbgTimerName,
CodeViewLineTablesGroupName));
- // FIXME: Don't emit DWARF debug info if there's at least one function
- // with AddressSanitizer instrumentation.
- // This is a band-aid fix for PR22032.
- for (auto &F : M.functions()) {
- if (F.hasFnAttribute(Attribute::SanitizeAddress)) {
- skip_dwarf = true;
- break;
- }
- }
}
- if (!skip_dwarf) {
+ if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
DD = new DwarfDebug(this, &M);
Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
}
@@ -340,8 +343,51 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
return TM.getSymbol(GV, *Mang);
}
+static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) {
+ return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName());
+}
+
+static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) {
+ return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName());
+}
+
+/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable.
+void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV,
+ MCSymbol *EmittedSym,
+ bool AllZeroInitValue) {
+ MCSection *TLSVarSection = getObjFileLowering().getDataSection();
+ OutStreamer->SwitchSection(TLSVarSection);
+ MCSymbol *GVSym = getSymbol(GV);
+ EmitLinkage(GV, EmittedSym); // same linkage as GV
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+ unsigned AlignLog = getGVAlignmentLog2(GV, DL);
+ unsigned WordSize = DL.getPointerSize();
+ unsigned Alignment = DL.getPointerABIAlignment();
+ EmitAlignment(Log2_32(Alignment));
+ OutStreamer->EmitLabel(EmittedSym);
+ OutStreamer->EmitIntValue(Size, WordSize);
+ OutStreamer->EmitIntValue((1 << AlignLog), WordSize);
+ OutStreamer->EmitIntValue(0, WordSize);
+ if (GV->hasInitializer() && !AllZeroInitValue) {
+ OutStreamer->EmitSymbolValue(
+ getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize);
+ } else
+ OutStreamer->EmitIntValue(0, WordSize);
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedSym),
+ MCConstantExpr::create(4 * WordSize, OutContext));
+ OutStreamer->AddBlankLine(); // End of the __emutls_v.* variable.
+}
+
/// EmitGlobalVariable - Emit the specified global variable to the .s file.
void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ bool IsEmuTLSVar =
+ GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal &&
+ TM.Options.EmulatedTLS;
+ assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&
+ "No emulated TLS variables in the common section");
+
if (GV->hasInitializer()) {
// Check to see if this is a special global used by LLVM, if so, emit it.
if (EmitSpecialLLVMGlobal(GV))
@@ -352,7 +398,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GlobalGOTEquivs.count(getSymbol(GV)))
return;
- if (isVerbose()) {
+ if (isVerbose() && !IsEmuTLSVar) {
+ // When printing the control variable __emutls_v.*,
+ // we don't need to print the original TLS variable name.
GV->printAsOperand(OutStreamer->GetCommentOS(),
/*PrintType=*/false, GV->getParent());
OutStreamer->GetCommentOS() << '\n';
@@ -360,7 +408,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
MCSymbol *GVSym = getSymbol(GV);
- EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+ MCSymbol *EmittedSym = IsEmuTLSVar ?
+ getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym;
+ // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes.
+ // GV's or GVSym's attributes will be used for the EmittedSym.
+
+ EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration());
if (!GV->hasInitializer()) // External globals require no extra code.
return;
@@ -371,17 +424,29 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
"' is already defined");
if (MAI->hasDotTypeDotSizeDirective())
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+ OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject);
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
- const DataLayout *DL = TM.getDataLayout();
- uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
// sections and expected to be contiguous (e.g. ObjC metadata).
- unsigned AlignLog = getGVAlignmentLog2(GV, *DL);
+ unsigned AlignLog = getGVAlignmentLog2(GV, DL);
+
+ bool AllZeroInitValue = false;
+ const Constant *InitValue = GV->getInitializer();
+ if (isa<ConstantAggregateZero>(InitValue))
+ AllZeroInitValue = true;
+ else {
+ const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue);
+ if (InitIntValue && InitIntValue->isZero())
+ AllZeroInitValue = true;
+ }
+ if (IsEmuTLSVar)
+ EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue);
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
@@ -390,6 +455,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Handle common and BSS local symbols (.lcomm).
if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+ assert(!(IsEmuTLSVar && GVKind.isCommon()) &&
+ "No emulated TLS variables in the common section");
if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
unsigned Align = 1 << AlignLog;
@@ -434,12 +501,21 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
return;
}
- MCSection *TheSection =
+ if (IsEmuTLSVar && AllZeroInitValue)
+ return; // No need of initialization values.
+
+ MCSymbol *EmittedInitSym = IsEmuTLSVar ?
+ getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym;
+ // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes.
+ // GV's or GVSym's attributes will be used for the EmittedInitSym.
+
+ MCSection *TheSection = IsEmuTLSVar ?
+ getObjFileLowering().getReadOnlySection() :
getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
// Handle the zerofill directive on darwin, which is a special form of BSS
// emission.
- if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+ if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) {
if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined.
// .globl _foo
@@ -459,7 +535,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// TLOF class. This will also make it more obvious that stuff like
// MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
// specific code.
- if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) {
// Emit the .tbss symbol
MCSymbol *MangSym =
OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
@@ -473,7 +549,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
EmitAlignment(AlignLog, GV);
OutStreamer->EmitLabel(MangSym);
- EmitGlobalConstant(GV->getInitializer());
+ EmitGlobalConstant(GV->getParent()->getDataLayout(),
+ GV->getInitializer());
}
OutStreamer->AddBlankLine();
@@ -490,7 +567,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// - __tlv_bootstrap - used to make sure support exists
// - spare pointer, used when mapped by the runtime
// - pointer to mangled symbol above with initializer
- unsigned PtrSize = DL->getPointerTypeSize(GV->getType());
+ unsigned PtrSize = DL.getPointerTypeSize(GV->getType());
OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
PtrSize);
OutStreamer->EmitIntValue(0, PtrSize);
@@ -502,16 +579,18 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->SwitchSection(TheSection);
- EmitLinkage(GV, GVSym);
+ // emutls_t.* symbols are only used in the current compilation unit.
+ if (!IsEmuTLSVar)
+ EmitLinkage(GV, EmittedInitSym);
EmitAlignment(AlignLog, GV);
- OutStreamer->EmitLabel(GVSym);
+ OutStreamer->EmitLabel(EmittedInitSym);
- EmitGlobalConstant(GV->getInitializer());
+ EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
if (MAI->hasDotTypeDotSizeDirective())
// .size foo, 42
- OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym),
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym),
MCConstantExpr::create(Size, OutContext));
OutStreamer->AddBlankLine();
@@ -545,7 +624,7 @@ void AsmPrinter::EmitFunctionHeader() {
// Emit the prefix data.
if (F->hasPrefixData())
- EmitGlobalConstant(F->getPrefixData());
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
// Emit the CurrentFnSym. This is a virtual function to allow targets to
// do their wild and crazy things as required.
@@ -580,7 +659,7 @@ void AsmPrinter::EmitFunctionHeader() {
// Emit the prologue data.
if (F->hasPrologueData())
- EmitGlobalConstant(F->getPrologueData());
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData());
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -640,19 +719,27 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
/// that is an implicit def.
void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- OutStreamer->AddComment(Twine("implicit-def: ") +
- MMI->getContext().getRegisterInfo()->getName(RegNo));
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "implicit-def: "
+ << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo());
+
+ OutStreamer->AddComment(OS.str());
OutStreamer->AddBlankLine();
}
static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
- std::string Str = "kill:";
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "kill:";
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &Op = MI->getOperand(i);
assert(Op.isReg() && "KILL instruction must have only register operands");
- Str += ' ';
- Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg());
- Str += (Op.isDef() ? "<def>" : "<kill>");
+ OS << ' '
+ << PrintReg(Op.getReg(),
+ AP.MF->getSubtarget().getRegisterInfo())
+ << (Op.isDef() ? "<def>" : "<kill>");
}
AP.OutStreamer->AddComment(Str);
AP.OutStreamer->AddBlankLine();
@@ -688,6 +775,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
+ for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
+ if (Deref) {
+ // We currently don't support extra Offsets or derefs after the first
+ // one. Bail out early instead of emitting an incorrect comment
+ OS << " [complex expression]";
+ AP.OutStreamer->emitRawComment(OS.str());
+ return true;
+ }
+ uint64_t Op = Expr->getElement(i);
+ if (Op == dwarf::DW_OP_deref) {
+ Deref = true;
+ continue;
+ } else if (Op == dwarf::DW_OP_bit_piece) {
+ // There can't be any operands after this in a valid expression
+ break;
+ }
+ uint64_t ExtraOffset = Expr->getElement(i++);
+ if (Op == dwarf::DW_OP_plus)
+ Offset += ExtraOffset;
+ else {
+ assert(Op == dwarf::DW_OP_minus);
+ Offset -= ExtraOffset;
+ }
+ }
+
// Register or immediate value. Register 0 means undef.
if (MI->getOperand(0).isFPImm()) {
APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
@@ -727,7 +839,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
if (Deref)
OS << '[';
- OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg);
+ OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
}
if (Deref)
@@ -888,7 +1000,7 @@ void AsmPrinter::EmitFunctionBody() {
EmitFunctionBodyEnd();
if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
- MAI->hasDotTypeDotSizeDirective()) {
+ MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
// Create a symbol for the end of function.
CurrentFnEnd = createTempSymbol("func_end");
OutStreamer->EmitLabel(CurrentFnEnd);
@@ -1047,20 +1159,17 @@ bool AsmPrinter::doFinalization(Module &M) {
// Output stubs for external and common global variables.
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
- OutStreamer->SwitchSection(TLOF.getDataRelSection());
- const DataLayout *DL = TM.getDataLayout();
+ OutStreamer->SwitchSection(TLOF.getDataSection());
+ const DataLayout &DL = M.getDataLayout();
for (const auto &Stub : Stubs) {
OutStreamer->EmitLabel(Stub.first);
OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
- DL->getPointerSize());
+ DL.getPointerSize());
}
}
}
- // Make sure we wrote out everything we need.
- OutStreamer->Flush();
-
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
@@ -1103,10 +1212,29 @@ bool AsmPrinter::doFinalization(Module &M) {
else
assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
+ // Set the symbol type to function if the alias has a function type.
+ // This affects codegen when the aliasee is not a function.
+ if (Alias.getType()->getPointerElementType()->isFunctionTy())
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
+
EmitVisibility(Name, Alias.getVisibility());
// Emit the directives as assignments aka .set:
OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee()));
+
+ // If the aliasee does not correspond to a symbol in the output, i.e. the
+ // alias is not of an object or the aliased object is private, then set the
+ // size of the alias symbol from the type of the alias. We don't do this in
+ // other situations as the alias and aliasee having differing types but same
+ // size may be intentional.
+ const GlobalObject *BaseObject = Alias.getBaseObject();
+ if (MAI->hasDotTypeDotSizeDirective() && Alias.getValueType()->isSized() &&
+ (!BaseObject || BaseObject->hasPrivateLinkage())) {
+ const DataLayout &DL = M.getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(Alias.getValueType());
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(Name),
+ MCConstantExpr::create(Size, OutContext));
+ }
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -1120,16 +1248,16 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit __morestack address if needed for indirect calls.
if (MMI->usesMorestackAddr()) {
- MCSection *ReadOnlySection =
- getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
- /*C=*/nullptr);
+ MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(
+ getDataLayout(), SectionKind::getReadOnly(),
+ /*C=*/nullptr);
OutStreamer->SwitchSection(ReadOnlySection);
MCSymbol *AddrSymbol =
OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
OutStreamer->EmitLabel(AddrSymbol);
- unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
+ unsigned PtrSize = M.getDataLayout().getPointerSize(0);
OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
PtrSize);
}
@@ -1169,7 +1297,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurExceptionSym = nullptr;
bool NeedsLocalForSize = MAI->needsLocalForSize();
if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
- NeedsLocalForSize) {
+ MMI->hasEHFunclets() || NeedsLocalForSize) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
@@ -1206,14 +1334,14 @@ void AsmPrinter::EmitConstantPool() {
const MachineConstantPoolEntry &CPE = CP[i];
unsigned Align = CPE.getAlignment();
- SectionKind Kind =
- CPE.getSectionKind(TM.getDataLayout());
+ SectionKind Kind = CPE.getSectionKind(&getDataLayout());
const Constant *C = nullptr;
if (!CPE.isMachineConstantPoolEntry())
C = CPE.Val.ConstVal;
- MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C);
+ MCSection *S =
+ getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C);
// The number of sections are small, just do a linear search from the
// last section to the first.
@@ -1260,14 +1388,13 @@ void AsmPrinter::EmitConstantPool() {
OutStreamer->EmitZeros(NewOffset - Offset);
Type *Ty = CPE.getType();
- Offset = NewOffset +
- TM.getDataLayout()->getTypeAllocSize(Ty);
+ Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty);
OutStreamer->EmitLabel(Sym);
if (CPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
else
- EmitGlobalConstant(CPE.Val.ConstVal);
+ EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal);
}
}
}
@@ -1276,7 +1403,7 @@ void AsmPrinter::EmitConstantPool() {
/// by the current function to the current output stream.
///
void AsmPrinter::EmitJumpTableInfo() {
- const DataLayout *DL = MF->getTarget().getDataLayout();
+ const DataLayout &DL = MF->getDataLayout();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (!MJTI) return;
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
@@ -1296,8 +1423,7 @@ void AsmPrinter::EmitJumpTableInfo() {
OutStreamer->SwitchSection(ReadOnlySection);
}
- EmitAlignment(Log2_32(
- MJTI->getEntryAlignment(*TM.getDataLayout())));
+ EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL)));
// Jump tables in code sections are marked with a data_region directive
// where that's supported.
@@ -1335,7 +1461,7 @@ void AsmPrinter::EmitJumpTableInfo() {
// before each jump table. The first label is never referenced, but tells
// the assembler and linker the extents of the jump table object. The
// second label is actually referenced by the code.
- if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix())
+ if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix())
// FIXME: This doesn't have to have any specific name, just any randomly
// named and numbered 'l' label would work. Simplify GetJTISymbol.
OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
@@ -1409,8 +1535,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
assert(Value && "Unknown entry kind!");
- unsigned EntrySize =
- MJTI->getEntrySize(*TM.getDataLayout());
+ unsigned EntrySize = MJTI->getEntrySize(getDataLayout());
OutStreamer->EmitValue(Value, EntrySize);
}
@@ -1435,7 +1560,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
assert(GV->hasInitializer() && "Not a special LLVM global!");
if (GV->getName() == "llvm.global_ctors") {
- EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
+ EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ /* isCtor */ true);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1447,7 +1573,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
}
if (GV->getName() == "llvm.global_dtors") {
- EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
+ EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ /* isCtor */ false);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1485,7 +1612,8 @@ struct Structor {
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
-void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
+void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool isCtor) {
// Should be an array of '{ int, void ()* }' structs. The first value is the
// init priority.
if (!isa<ConstantArray>(List)) return;
@@ -1520,8 +1648,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
}
// Emit the function pointers in the target-specific order
- const DataLayout *DL = TM.getDataLayout();
- unsigned Align = Log2_32(DL->getPointerPrefAlignment());
+ unsigned Align = Log2_32(DL.getPointerPrefAlignment());
std::stable_sort(Structors.begin(), Structors.end(),
[](const Structor &L,
const Structor &R) { return L.Priority < R.Priority; });
@@ -1542,7 +1669,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
OutStreamer->SwitchSection(OutputSection);
if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
EmitAlignment(Align);
- EmitXXStructor(S.Func);
+ EmitXXStructor(DL, S.Func);
}
}
@@ -1621,8 +1748,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
//
void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
if (GV)
- NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(),
- NumBits);
+ NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits);
if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
@@ -1668,7 +1794,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
if (C != CE)
return lowerConstant(C);
@@ -1682,11 +1808,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
- const DataLayout &DL = *TM.getDataLayout();
-
// Generate a symbolic expression for the byte address
- APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
- cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
+ APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(getDataLayout(), OffsetAI);
const MCExpr *Base = lowerConstant(CE->getOperand(0));
if (!OffsetAI)
@@ -1707,7 +1831,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
return lowerConstant(CE->getOperand(0));
case Instruction::IntToPtr: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
@@ -1718,7 +1842,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
}
case Instruction::PtrToInt: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
@@ -1769,10 +1893,13 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
}
}
-static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP,
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
+ AsmPrinter &AP,
const Constant *BaseCV = nullptr,
uint64_t Offset = 0);
+static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP);
+
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
@@ -1789,9 +1916,9 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) {
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
-static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
+static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- uint64_t Size = TM.getDataLayout()->getTypeAllocSizeInBits(V->getType());
+ uint64_t Size = DL.getTypeAllocSizeInBits(V->getType());
assert(Size % 8 == 0);
// Extend the element to take zero padding into account.
@@ -1806,7 +1933,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
// byte.
assert(CA->getNumOperands() != 0 && "Should be a CAZ");
Constant *Op0 = CA->getOperand(0);
- int Byte = isRepeatedByteSequence(Op0, TM);
+ int Byte = isRepeatedByteSequence(Op0, DL);
if (Byte == -1)
return -1;
@@ -1823,15 +1950,14 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
return -1;
}
-static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
- AsmPrinter &AP){
+static void emitGlobalConstantDataSequential(const DataLayout &DL,
+ const ConstantDataSequential *CDS,
+ AsmPrinter &AP) {
// See if we can aggregate this into a .fill, if so, emit it as such.
- int Value = isRepeatedByteSequence(CDS, AP.TM);
+ int Value = isRepeatedByteSequence(CDS, DL);
if (Value != -1) {
- uint64_t Bytes =
- AP.TM.getDataLayout()->getTypeAllocSize(
- CDS->getType());
+ uint64_t Bytes = DL.getTypeAllocSize(CDS->getType());
// Don't emit a 1-byte object as a .fill.
if (Bytes > 1)
return AP.OutStreamer->EmitFill(Bytes, Value);
@@ -1851,37 +1977,11 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i),
ElementByteSize);
}
- } else if (ElementByteSize == 4) {
- // FP Constants are printed as integer constants to avoid losing
- // precision.
- assert(CDS->getElementType()->isFloatTy());
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
- union {
- float F;
- uint32_t I;
- };
-
- F = CDS->getElementAsFloat(i);
- if (AP.isVerbose())
- AP.OutStreamer->GetCommentOS() << "float " << F << '\n';
- AP.OutStreamer->EmitIntValue(I, 4);
- }
} else {
- assert(CDS->getElementType()->isDoubleTy());
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
- union {
- double F;
- uint64_t I;
- };
-
- F = CDS->getElementAsDouble(i);
- if (AP.isVerbose())
- AP.OutStreamer->GetCommentOS() << "double " << F << '\n';
- AP.OutStreamer->EmitIntValue(I, 8);
- }
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
+ emitGlobalConstantFP(cast<ConstantFP>(CDS->getElementAsConstant(I)), AP);
}
- const DataLayout &DL = *AP.TM.getDataLayout();
unsigned Size = DL.getTypeAllocSize(CDS->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
CDS->getNumElements();
@@ -1890,12 +1990,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
-static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
+static void emitGlobalConstantArray(const DataLayout &DL,
+ const ConstantArray *CA, AsmPrinter &AP,
const Constant *BaseCV, uint64_t Offset) {
// See if we can aggregate some values. Make sure it can be
// represented as a series of bytes of the constant value.
- int Value = isRepeatedByteSequence(CA, AP.TM);
- const DataLayout &DL = *AP.TM.getDataLayout();
+ int Value = isRepeatedByteSequence(CA, DL);
if (Value != -1) {
uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
@@ -1903,17 +2003,17 @@ static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
}
else {
for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
- emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset);
+ emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset);
Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
}
}
}
-static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
+static void emitGlobalConstantVector(const DataLayout &DL,
+ const ConstantVector *CV, AsmPrinter &AP) {
for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
- emitGlobalConstantImpl(CV->getOperand(i), AP);
+ emitGlobalConstantImpl(DL, CV->getOperand(i), AP);
- const DataLayout &DL = *AP.TM.getDataLayout();
unsigned Size = DL.getTypeAllocSize(CV->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
CV->getType()->getNumElements();
@@ -1921,21 +2021,21 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
AP.OutStreamer->EmitZeros(Padding);
}
-static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP,
+static void emitGlobalConstantStruct(const DataLayout &DL,
+ const ConstantStruct *CS, AsmPrinter &AP,
const Constant *BaseCV, uint64_t Offset) {
// Print the fields in successive locations. Pad to align if needed!
- const DataLayout *DL = AP.TM.getDataLayout();
- unsigned Size = DL->getTypeAllocSize(CS->getType());
- const StructLayout *Layout = DL->getStructLayout(CS->getType());
+ unsigned Size = DL.getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = DL.getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
const Constant *Field = CS->getOperand(i);
// Print the actual field value.
- emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar);
+ emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar);
// Check if padding is needed and insert one or more 0s.
- uint64_t FieldSize = DL->getTypeAllocSize(Field->getType());
+ uint64_t FieldSize = DL.getTypeAllocSize(Field->getType());
uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
- Layout->getElementOffset(i)) - FieldSize;
SizeSoFar += FieldSize + PadSize;
@@ -1974,8 +2074,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
// PPC's long double has odd notions of endianness compared to how LLVM
// handles it: p[0] goes first for *big* endian on PPC.
- if (AP.TM.getDataLayout()->isBigEndian() &&
- !CFP->getType()->isPPC_FP128Ty()) {
+ if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) {
int Chunk = API.getNumWords() - 1;
if (TrailingBytes)
@@ -1993,13 +2092,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
}
// Emit the tail padding for the long double.
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = AP.getDataLayout();
AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
DL.getTypeStoreSize(CFP->getType()));
}
static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
- const DataLayout *DL = AP.TM.getDataLayout();
+ const DataLayout &DL = AP.getDataLayout();
unsigned BitWidth = CI->getBitWidth();
// Copy the value as we may massage the layout for constants whose bit width
@@ -2016,7 +2115,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// Big endian:
// * Record the extra bits to emit.
// * Realign the raw data to emit the chunks of 64-bits.
- if (DL->isBigEndian()) {
+ if (DL.isBigEndian()) {
// Basically the structure of the raw data is a chunk of 64-bits cells:
// 0 1 BitWidth / 64
// [chunk1][chunk2] ... [chunkN].
@@ -2037,7 +2136,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// quantities at a time.
const uint64_t *RawData = Realigned.getRawData();
for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
- uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i];
AP.OutStreamer->EmitIntValue(Val, 8);
}
@@ -2045,8 +2144,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// Emit the extra bits after the 64-bits chunks.
// Emit a directive that fills the expected size.
- uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(
- CI->getType());
+ uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType());
Size -= (BitWidth / 64) * 8;
assert(Size && Size * 8 >= ExtraBitsSize &&
(ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
@@ -2094,7 +2192,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
return;
- const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst);
+ const GlobalValue *BaseGV = dyn_cast_or_null<GlobalValue>(BaseCst);
if (!BaseGV)
return;
@@ -2149,10 +2247,10 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
}
-static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
- const Constant *BaseCV, uint64_t Offset) {
- const DataLayout *DL = AP.TM.getDataLayout();
- uint64_t Size = DL->getTypeAllocSize(CV->getType());
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
+ AsmPrinter &AP, const Constant *BaseCV,
+ uint64_t Offset) {
+ uint64_t Size = DL.getTypeAllocSize(CV->getType());
// Globals with sub-elements such as combinations of arrays and structs
// are handled recursively by emitGlobalConstantImpl. Keep track of the
@@ -2189,32 +2287,32 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
}
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
- return emitGlobalConstantDataSequential(CDS, AP);
+ return emitGlobalConstantDataSequential(DL, CDS, AP);
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
- return emitGlobalConstantArray(CVA, AP, BaseCV, Offset);
+ return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset);
if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
- return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset);
+ return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
// Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
// vectors).
if (CE->getOpcode() == Instruction::BitCast)
- return emitGlobalConstantImpl(CE->getOperand(0), AP);
+ return emitGlobalConstantImpl(DL, CE->getOperand(0), AP);
if (Size > 8) {
// If the constant expression's size is greater than 64-bits, then we have
// to emit the value in chunks. Try to constant fold the value and emit it
// that way.
- Constant *New = ConstantFoldConstantExpression(CE, *DL);
+ Constant *New = ConstantFoldConstantExpression(CE, DL);
if (New && New != CE)
- return emitGlobalConstantImpl(New, AP);
+ return emitGlobalConstantImpl(DL, New, AP);
}
}
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
- return emitGlobalConstantVector(V, AP);
+ return emitGlobalConstantVector(DL, V, AP);
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
@@ -2230,11 +2328,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
- uint64_t Size =
- TM.getDataLayout()->getTypeAllocSize(CV->getType());
+void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) {
+ uint64_t Size = DL.getTypeAllocSize(CV->getType());
if (Size)
- emitGlobalConstantImpl(CV, *this);
+ emitGlobalConstantImpl(DL, CV, *this);
else if (MAI->hasSubsectionsViaSymbols()) {
// If the global has zero size, emit a single byte so that two labels don't
// look like they are at the same location.
@@ -2272,10 +2369,10 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
- const DataLayout *DL = TM.getDataLayout();
- return OutContext.getOrCreateSymbol
- (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
- + "_" + Twine(CPID));
+ const DataLayout &DL = getDataLayout();
+ return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "CPI" + Twine(getFunctionNumber()) + "_" +
+ Twine(CPID));
}
/// GetJTISymbol - Return the symbol for the specified jump table entry.
@@ -2286,10 +2383,10 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
/// GetJTSetSymbol - Return the symbol for the specified jump table .set
/// FIXME: privatize to AsmPrinter.
MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
- const DataLayout *DL = TM.getDataLayout();
- return OutContext.getOrCreateSymbol
- (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
- Twine(UID) + "_set_" + Twine(MBBID));
+ const DataLayout &DL = getDataLayout();
+ return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ Twine(getFunctionNumber()) + "_" +
+ Twine(UID) + "_set_" + Twine(MBBID));
}
MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
@@ -2301,7 +2398,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
/// Return the MCSymbol for the specified ExternalSymbol.
MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
SmallString<60> NameStr;
- Mangler::getNameWithPrefix(NameStr, Sym, *TM.getDataLayout());
+ Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout());
return OutContext.getOrCreateSymbol(NameStr);
}
@@ -2376,6 +2473,14 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+ // End the previous funclet and start a new one.
+ if (MBB.isEHFuncletEntry()) {
+ for (const HandlerInfo &HI : Handlers) {
+ HI.Handler->endFunclet();
+ HI.Handler->beginFunclet(MBB);
+ }
+ }
+
// Emit an alignment directive for this block, if needed.
if (unsigned Align = MBB.getAlignment())
EmitAlignment(Align);
@@ -2389,20 +2494,28 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
if (isVerbose())
OutStreamer->AddComment("Block address taken");
- for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
- OutStreamer->EmitLabel(Sym);
+ // MBBs can have their address taken as part of CodeGen without having
+ // their corresponding BB's address taken in IR
+ if (BB->hasAddressTaken())
+ for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
+ OutStreamer->EmitLabel(Sym);
}
// Print some verbose block comments.
if (isVerbose()) {
- if (const BasicBlock *BB = MBB.getBasicBlock())
- if (BB->hasName())
- OutStreamer->AddComment("%" + BB->getName());
+ if (const BasicBlock *BB = MBB.getBasicBlock()) {
+ if (BB->hasName()) {
+ BB->printAsOperand(OutStreamer->GetCommentOS(),
+ /*PrintType=*/false, BB->getModule());
+ OutStreamer->GetCommentOS() << '\n';
+ }
+ }
emitBasicBlockLoopComments(MBB, LI, *this);
}
// Print the main label for the block.
- if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) {
+ if (MBB.pred_empty() ||
+ (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) {
if (isVerbose()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
@@ -2440,7 +2553,7 @@ bool AsmPrinter::
isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// If this is a landing pad, it isn't a fall through. If it has no preds,
// then nothing falls through to it.
- if (MBB->isLandingPad() || MBB->pred_empty())
+ if (MBB->isEHPad() || MBB->pred_empty())
return false;
// If there isn't exactly one predecessor, it can't be a fall through.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ad180b6..504c5d2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -47,7 +47,7 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
OutStreamer->EmitSLEB128IntValue(Value);
}
-/// EmitULEB128 - emit the specified signed leb128 value.
+/// EmitULEB128 - emit the specified unsigned leb128 value.
void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
unsigned PadTo) const {
if (isVerbose() && Desc)
@@ -56,18 +56,6 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
OutStreamer->EmitULEB128IntValue(Value, PadTo);
}
-/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
-void AsmPrinter::EmitCFAByte(unsigned Val) const {
- if (isVerbose()) {
- if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64)
- OutStreamer->AddComment("DW_CFA_offset + Reg (" +
- Twine(Val - dwarf::DW_CFA_offset) + ")");
- else
- OutStreamer->AddComment(dwarf::CallFrameString(Val));
- }
- OutStreamer->EmitIntValue(Val, 1);
-}
-
static const char *DecodeDWARFEncoding(unsigned Encoding) {
switch (Encoding) {
case dwarf::DW_EH_PE_absptr:
@@ -134,7 +122,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
default:
llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr:
- return TM.getDataLayout()->getPointerSize();
+ return MF->getDataLayout().getPointerSize();
case dwarf::DW_EH_PE_udata2:
return 2;
case dwarf::DW_EH_PE_udata4:
@@ -228,6 +216,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpDefCfaOffset:
OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
break;
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset());
+ break;
case MCCFIInstruction::OpDefCfa:
OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
break;
@@ -246,6 +237,12 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpSameValue:
OutStreamer->EmitCFISameValue(Inst.getRegister());
break;
+ case MCCFIInstruction::OpGnuArgsSize:
+ OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset());
+ break;
+ case MCCFIInstruction::OpEscape:
+ OutStreamer->EmitCFIEscape(Inst.getValues());
+ break;
}
}
@@ -284,17 +281,10 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
}
}
-void
-AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const {
- // For each abbrevation.
- for (const DIEAbbrev *Abbrev : Abbrevs) {
- // Emit the abbrevations code (base 1 index.)
- EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
-
- // Emit the abbreviations data.
- Abbrev->Emit(this);
- }
+void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const {
+ // Emit the abbreviations code (base 1 index.)
+ EmitULEB128(Abbrev.getNumber(), "Abbreviation Code");
- // Mark end of abbreviations.
- EmitULEB128(0, "EOM(3)");
+ // Emit the abbreviations data.
+ Abbrev.Emit(this);
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index f1efe9d..e59961f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -19,6 +19,7 @@
namespace llvm {
+class MachineBasicBlock;
class MachineFunction;
class MachineInstr;
class MCSymbol;
@@ -50,6 +51,11 @@ public:
/// beginFunction at all.
virtual void endFunction(const MachineFunction *MF) = 0;
+ /// \brief Emit target-specific EH funclet machinery.
+ virtual void beginFunclet(const MachineBasicBlock &MBB,
+ MCSymbol *Sym = nullptr) {}
+ virtual void endFunclet() {}
+
/// \brief Process beginning of an instruction.
virtual void beginInstruction(const MachineInstr *MI) = 0;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 793e629..4171657 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -127,19 +127,13 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI));
- // Create a temporary copy of the original STI because the parser may modify
- // it. For example, when switching between arm and thumb mode. If the target
- // needs to emit code to return to the original state it can do so in
- // emitInlineAsmEnd().
- MCSubtargetInfo TmpSTI = STI;
-
// We create a new MCInstrInfo here since we might be at the module level
// and not have a MachineFunction to initialize the TargetInstrInfo from and
// we only need MCInstrInfo for asm parsing. We create one unconditionally
// because it's not subtarget dependent.
std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
- TmpSTI, *Parser, *MII, MCOptions));
+ STI, *Parser, *MII, MCOptions));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
@@ -154,7 +148,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
// Don't implicitly switch to the text section before the asm.
int Res = Parser->Run(/*NoInitialTextSection*/ true,
/*NoFinalize*/ true);
- emitInlineAsmEnd(STI, &TmpSTI);
+ emitInlineAsmEnd(STI, &TAP->getSTI());
if (Res && !HasDiagHandler)
report_fatal_error("Error parsing inline asm\n");
}
@@ -512,9 +506,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
/// for their own strange codes.
void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
const char *Code) const {
- const DataLayout *DL = TM.getDataLayout();
if (!strcmp(Code, "private")) {
- OS << DL->getPrivateGlobalPrefix();
+ const DataLayout &DL = MF->getDataLayout();
+ OS << DL.getPrivateGlobalPrefix();
} else if (!strcmp(Code, "comment")) {
OS << MAI->getCommentString();
} else if (!strcmp(Code, "uid")) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 0cc829f..df1997b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -24,16 +24,19 @@
namespace llvm {
class ByteStreamer {
- public:
- virtual ~ByteStreamer() {}
+ protected:
+ ~ByteStreamer() = default;
+ ByteStreamer(const ByteStreamer&) = default;
+ ByteStreamer() = default;
+ public:
// For now we're just handling the calls we need for dwarf emission/hashing.
virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0;
};
-class APByteStreamer : public ByteStreamer {
+class APByteStreamer final : public ByteStreamer {
private:
AsmPrinter &AP;
@@ -53,7 +56,7 @@ public:
}
};
-class HashingByteStreamer : public ByteStreamer {
+class HashingByteStreamer final : public ByteStreamer {
private:
DIEHash &Hash;
public:
@@ -69,7 +72,7 @@ class HashingByteStreamer : public ByteStreamer {
}
};
-class BufferByteStreamer : public ByteStreamer {
+class BufferByteStreamer final : public ByteStreamer {
private:
SmallVectorImpl<char> &Buffer;
SmallVectorImpl<std::string> &Comments;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 46dbc76..bf794f7 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -86,7 +86,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
AP->EmitULEB128(0, "EOM(2)");
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEAbbrev::print(raw_ostream &O) {
O << "Abbreviation @"
<< format("0x%lx", (long)(intptr_t)this)
@@ -104,12 +104,13 @@ void DIEAbbrev::print(raw_ostream &O) {
<< '\n';
}
}
+
+LLVM_DUMP_METHOD
void DIEAbbrev::dump() { print(dbgs()); }
-#endif
DIEAbbrev DIE::generateAbbrev() const {
DIEAbbrev Abbrev(Tag, hasChildren());
- for (const DIEValue &V : Values)
+ for (const DIEValue &V : values())
Abbrev.AddAttribute(V.getAttribute(), V.getForm());
return Abbrev;
}
@@ -144,36 +145,35 @@ DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const {
return DIEValue();
}
-#ifndef NDEBUG
-void DIE::print(raw_ostream &O, unsigned IndentCount) const {
- const std::string Indent(IndentCount, ' ');
- bool isBlock = getTag() == 0;
-
- if (!isBlock) {
- O << Indent
- << "Die: "
- << format("0x%lx", (long)(intptr_t)this)
- << ", Offset: " << Offset
- << ", Size: " << Size << "\n";
-
- O << Indent
- << dwarf::TagString(getTag())
- << " "
- << dwarf::ChildrenString(hasChildren()) << "\n";
- } else {
- O << "Size: " << Size << "\n";
- }
+LLVM_DUMP_METHOD
+static void printValues(raw_ostream &O, const DIEValueList &Values,
+ StringRef Type, unsigned Size, unsigned IndentCount) {
+ O << Type << ": Size: " << Size << "\n";
- IndentCount += 2;
unsigned I = 0;
- for (const auto &V : Values) {
+ const std::string Indent(IndentCount, ' ');
+ for (const auto &V : Values.values()) {
O << Indent;
+ O << "Blk[" << I++ << "]";
+ O << " " << dwarf::FormEncodingString(V.getForm()) << " ";
+ V.print(O);
+ O << "\n";
+ }
+}
- if (!isBlock)
- O << dwarf::AttributeString(V.getAttribute());
- else
- O << "Blk[" << I++ << "]";
+LLVM_DUMP_METHOD
+void DIE::print(raw_ostream &O, unsigned IndentCount) const {
+ const std::string Indent(IndentCount, ' ');
+ O << Indent << "Die: " << format("0x%lx", (long)(intptr_t) this)
+ << ", Offset: " << Offset << ", Size: " << Size << "\n";
+ O << Indent << dwarf::TagString(getTag()) << " "
+ << dwarf::ChildrenString(hasChildren()) << "\n";
+
+ IndentCount += 2;
+ for (const auto &V : values()) {
+ O << Indent;
+ O << dwarf::AttributeString(V.getAttribute());
O << " " << dwarf::FormEncodingString(V.getForm()) << " ";
V.print(O);
O << "\n";
@@ -183,13 +183,13 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
for (const auto &Child : children())
Child.print(O, IndentCount + 4);
- if (!isBlock) O << "\n";
+ O << "\n";
}
+LLVM_DUMP_METHOD
void DIE::dump() {
print(dbgs());
}
-#endif
void DIEValue::EmitValue(const AsmPrinter *AP) const {
switch (Ty) {
@@ -215,7 +215,7 @@ unsigned DIEValue::SizeOf(const AsmPrinter *AP) const {
llvm_unreachable("Unknown DIE kind");
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEValue::print(raw_ostream &O) const {
switch (Ty) {
case isNone:
@@ -228,10 +228,10 @@ void DIEValue::print(raw_ostream &O) const {
}
}
+LLVM_DUMP_METHOD
void DIEValue::dump() const {
print(dbgs());
}
-#endif
//===----------------------------------------------------------------------===//
// DIEInteger Implementation
@@ -264,7 +264,8 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
case dwarf::DW_FORM_addr:
- Size = Asm->getDataLayout().getPointerSize(); break;
+ Size = Asm->getPointerSize();
+ break;
case dwarf::DW_FORM_ref_addr:
Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
break;
@@ -294,21 +295,21 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer);
case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
- case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize();
+ case dwarf::DW_FORM_addr:
+ return AP->getPointerSize();
case dwarf::DW_FORM_ref_addr:
if (AP->OutStreamer->getContext().getDwarfVersion() == 2)
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
return sizeof(int32_t);
default: llvm_unreachable("DIE Value form not supported yet");
}
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEInteger::print(raw_ostream &O) const {
O << "Int: " << (int64_t)Integer << " 0x";
O.write_hex(Integer);
}
-#endif
//===----------------------------------------------------------------------===//
// DIEExpr Implementation
@@ -326,12 +327,11 @@ unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
-#endif
//===----------------------------------------------------------------------===//
// DIELabel Implementation
@@ -352,12 +352,11 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
-#endif
//===----------------------------------------------------------------------===//
// DIEDelta Implementation
@@ -375,14 +374,13 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEDelta::print(raw_ostream &O) const {
O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
}
-#endif
//===----------------------------------------------------------------------===//
// DIEString Implementation
@@ -431,11 +429,10 @@ unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return DIEInteger(S.getOffset()).SizeOf(AP, Form);
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEString::print(raw_ostream &O) const {
O << "String: " << S.getString();
}
-#endif
//===----------------------------------------------------------------------===//
// DIEEntry Implementation
@@ -472,15 +469,14 @@ unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
const DwarfDebug *DD = AP->getDwarfDebug();
assert(DD && "Expected Dwarf Debug info to be available");
if (DD->getDwarfVersion() == 2)
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
return sizeof(int32_t);
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEEntry::print(raw_ostream &O) const {
O << format("Die: 0x%lx", (long)(intptr_t)&Entry);
}
-#endif
//===----------------------------------------------------------------------===//
// DIETypeSignature Implementation
@@ -491,11 +487,10 @@ void DIETypeSignature::EmitValue(const AsmPrinter *Asm,
Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8);
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIETypeSignature::print(raw_ostream &O) const {
O << format("Type Unit: 0x%lx", Unit->getTypeSignature());
}
-#endif
//===----------------------------------------------------------------------===//
// DIELoc Implementation
@@ -505,7 +500,7 @@ void DIETypeSignature::print(raw_ostream &O) const {
///
unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
- for (const auto &V : Values)
+ for (const auto &V : values())
Size += V.SizeOf(AP);
}
@@ -525,7 +520,7 @@ void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
Asm->EmitULEB128(Size); break;
}
- for (const auto &V : Values)
+ for (const auto &V : values())
V.EmitValue(Asm);
}
@@ -543,12 +538,10 @@ unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
}
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIELoc::print(raw_ostream &O) const {
- O << "ExprLoc: ";
- DIE::print(O, 5);
+ printValues(O, *this, "ExprLoc", Size, 5);
}
-#endif
//===----------------------------------------------------------------------===//
// DIEBlock Implementation
@@ -558,7 +551,7 @@ void DIELoc::print(raw_ostream &O) const {
///
unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
- for (const auto &V : Values)
+ for (const auto &V : values())
Size += V.SizeOf(AP);
}
@@ -576,7 +569,7 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
}
- for (const auto &V : Values)
+ for (const auto &V : values())
V.EmitValue(Asm);
}
@@ -592,12 +585,10 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
}
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEBlock::print(raw_ostream &O) const {
- O << "Blk: ";
- DIE::print(O, 5);
+ printValues(O, *this, "Blk", Size, 5);
}
-#endif
//===----------------------------------------------------------------------===//
// DIELocList Implementation
@@ -608,7 +599,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return 4;
if (Form == dwarf::DW_FORM_sec_offset)
return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
}
/// EmitValue - Emit label value.
@@ -619,6 +610,5 @@ void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf());
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; }
-#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 5e60156..0201065 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -470,38 +470,6 @@ void DIEHash::computeHash(const DIE &Die) {
}
/// This is based on the type signature computation given in section 7.27 of the
-/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE
-/// with the exception that we are hashing only the context and the name of the
-/// type.
-uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) {
-
- // Add the contexts to the hash. We won't be computing the ODR hash for
- // function local types so it's safe to use the generic context hashing
- // algorithm here.
- // FIXME: If we figure out how to account for linkage in some way we could
- // actually do this with a slight modification to the parent hash algorithm.
- if (const DIE *Parent = Die.getParent())
- addParentContext(*Parent);
-
- // Add the current DIE information.
-
- // Add the DWARF tag of the DIE.
- addULEB128(Die.getTag());
-
- // Add the name of the type to the hash.
- addString(getDIEStringAttr(Die, dwarf::DW_AT_name));
-
- // Now get the result.
- MD5::MD5Result Result;
- Hash.final(Result);
-
- // ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
- return support::endian::read64le(Result + 8);
-}
-
-/// This is based on the type signature computation given in section 7.27 of the
/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
/// with the inclusion of the full CU and all top level CU entities.
// TODO: Initialize the type chain at 0 instead of 1 for CU signatures.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 833ca02..44f0ce8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -84,9 +84,6 @@ class DIEHash {
public:
DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
- /// \brief Computes the ODR signature.
- uint64_t computeDIEODRSignature(const DIE &Die);
-
/// \brief Computes the CU signature.
uint64_t computeCUSignature(const DIE &Die);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index afffa83..bbe5324 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -9,6 +9,8 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+
+#include "DebugLocStream.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -17,7 +19,6 @@
namespace llvm {
class AsmPrinter;
-class DebugLocStream;
/// \brief This struct describes location entries emitted in the .debug_loc
/// section.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index f8cdde2..4ad3e18 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -41,7 +41,7 @@ void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die,
DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags));
}
-void DwarfAccelTable::ComputeBucketCount(void) {
+void DwarfAccelTable::ComputeBucketCount() {
// First get the number of unique hashes.
std::vector<uint32_t> uniques(Data.size());
for (size_t i = 0, e = Data.size(); i < e; ++i)
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 2c212c7..6665c16 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -78,12 +78,11 @@ void DwarfCFIException::endModule() {
return;
// Emit references to all used personality functions
- const std::vector<const Function*> &Personalities = MMI->getPersonalities();
- for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
- if (!Personalities[i])
+ for (const Function *Personality : MMI->getPersonalities()) {
+ if (!Personality)
continue;
- MCSymbol *Sym = Asm->getSymbol(Personalities[i]);
- TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->TM, Sym);
+ MCSymbol *Sym = Asm->getSymbol(Personality);
+ TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym);
}
}
@@ -108,7 +107,6 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
const Function *Per = nullptr;
if (F->hasPersonalityFn())
Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
- assert(!MMI->getPersonality() || Per == MMI->getPersonality());
// Emit a personality function even when there are no landing pads
bool forceEmitPersonality =
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index fc54a29..725063a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -151,28 +151,33 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
const MCSymbol *Sym = Asm->getSymbol(Global);
if (Global->isThreadLocal()) {
- // FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
- // Based on GCC's support for TLS:
- if (!DD->useSplitDwarf()) {
- // 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1,
- PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) offset of the TLS variable
- // within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
- Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ if (Asm->TM.Options.EmulatedTLS) {
+ // TODO: add debug info for emulated thread local mode.
} else {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(*Loc, dwarf::DW_FORM_udata,
- DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
+ addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4
+ ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ }
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
}
- // 3) followed by an OP to make the debugger do a TLS lookup.
- addUInt(*Loc, dwarf::DW_FORM_data1,
- DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
- : dwarf::DW_OP_form_tls_address);
} else {
DD->addArangeLabel(SymbolCU(this, Sym));
addOpAddress(*Loc, Sym);
@@ -338,9 +343,9 @@ void DwarfCompileUnit::constructScopeDIE(
// Skip imported directives in gmlt-like data.
if (!includeMinimalInlineScopes()) {
// There is no need to emit empty lexical block DIE.
- for (const auto &E : DD->findImportedEntitiesForScope(DS))
+ for (const auto *IE : ImportedEntities[DS])
Children.push_back(
- constructImportedEntityDIE(cast<DIImportedEntity>(E.second)));
+ constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
}
// If there are only other scopes as children, put them directly in the
@@ -435,6 +440,9 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
+ if (IA->getDiscriminator())
+ addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
+ IA->getDiscriminator());
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
@@ -517,8 +525,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
unsigned FrameReg = 0;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- assert(Expr != DV.getExpression().end() &&
- "Wrong number of expressions");
+ assert(Expr != DV.getExpression().end() && "Wrong number of expressions");
DwarfExpr.AddMachineRegIndirect(FrameReg, Offset);
DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
++Expr;
@@ -597,8 +604,8 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
return ObjectPointer;
}
-void
-DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
+void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
+ LexicalScope *Scope) {
DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()];
if (AbsDef)
return;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 509c943..2e28467 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -39,6 +39,12 @@ class DwarfCompileUnit : public DwarfUnit {
/// The start of the unit within its section.
MCSymbol *LabelBegin;
+ typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList;
+ typedef llvm::DenseMap<const MDNode *, ImportedEntityList>
+ ImportedEntityMap;
+
+ ImportedEntityMap ImportedEntities;
+
/// GlobalNames - A map of globally visible named entities for this unit.
StringMap<const DIE *> GlobalNames;
@@ -98,6 +104,10 @@ public:
unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
+ void addImportedEntity(const DIImportedEntity* IE) {
+ ImportedEntities[IE->getScope()].push_back(IE);
+ }
+
/// addRange - Add an address range to the list of ranges for this unit.
void addRange(RangeSpan Range);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7d03a39..3466f34 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -104,6 +105,14 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
clEnumVal(Disable, "Disabled"), clEnumValEnd),
cl::init(Default));
+static cl::opt<DefaultOnOff>
+DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
+ cl::desc("Emit DWARF linkage-name attributes."),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"), clEnumValEnd),
+ cl::init(Default));
+
static const char *const DWARFGroupName = "DWARF Emission";
static const char *const DbgTimerName = "DWARF Debug Writer";
@@ -176,9 +185,9 @@ const DIType *DbgVariable::getType() const {
if (tag == dwarf::DW_TAG_pointer_type)
subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
- auto Elements = cast<DICompositeTypeBase>(subType)->getElements();
+ auto Elements = cast<DICompositeType>(subType)->getElements();
for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
- auto *DT = cast<DIDerivedTypeBase>(Elements[i]);
+ auto *DT = cast<DIDerivedType>(Elements[i]);
if (getName() == DT->getName())
return resolve(DT->getBaseType());
}
@@ -194,45 +203,67 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()),
PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator),
- UsedNonDefaultText(false),
SkeletonHolder(A, "skel_string", DIEValueAllocator),
IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
- IsPS4(Triple(A->getTargetTriple()).isPS4()),
AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
- AccelTypes(TypeAtoms) {
+ AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
CurFn = nullptr;
CurMI = nullptr;
+ Triple TT(Asm->getTargetTriple());
+
+ // Make sure we know our "debugger tuning." The target option takes
+ // precedence; fall back to triple-based defaults.
+ if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
+ DebuggerTuning = Asm->TM.Options.DebuggerTuning;
+ else if (IsDarwin || TT.isOSFreeBSD())
+ DebuggerTuning = DebuggerKind::LLDB;
+ else if (TT.isPS4CPU())
+ DebuggerTuning = DebuggerKind::SCE;
+ else
+ DebuggerTuning = DebuggerKind::GDB;
- // Turn on accelerator tables for Darwin by default, pubnames by
- // default for non-Darwin/PS4, and handle split dwarf.
+ // Turn on accelerator tables for LLDB by default.
if (DwarfAccelTables == Default)
- HasDwarfAccelTables = IsDarwin;
+ HasDwarfAccelTables = tuneForLLDB();
else
HasDwarfAccelTables = DwarfAccelTables == Enable;
+ // Handle split DWARF. Off by default for now.
if (SplitDwarf == Default)
HasSplitDwarf = false;
else
HasSplitDwarf = SplitDwarf == Enable;
+ // Pubnames/pubtypes on by default for GDB.
if (DwarfPubSections == Default)
- HasDwarfPubSections = !IsDarwin && !IsPS4;
+ HasDwarfPubSections = tuneForGDB();
else
HasDwarfPubSections = DwarfPubSections == Enable;
+ // SCE does not use linkage names.
+ if (DwarfLinkageNames == Default)
+ UseLinkageNames = !tuneForSCE();
+ else
+ UseLinkageNames = DwarfLinkageNames == Enable;
+
unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
+ // Use dwarf 4 by default if nothing is requested.
+ DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION;
- // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3).
- // Everybody else uses GNU's.
- UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3;
+ // Work around a GDB bug. GDB doesn't support the standard opcode;
+ // SCE doesn't support GNU's; LLDB prefers the standard opcode, which
+ // is defined as of DWARF 3.
+ // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented
+ // https://sourceware.org/bugzilla/show_bug.cgi?id=11616
+ UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3;
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
@@ -300,18 +331,6 @@ void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
}
}
-/// isSubprogramContext - Return true if Context is either a subprogram
-/// or another context nested inside a subprogram.
-bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
- if (!Context)
- return false;
- if (isa<DISubprogram>(Context))
- return true;
- if (auto *T = dyn_cast<DIType>(Context))
- return isSubprogramContext(resolve(T->getScope()));
- return false;
-}
-
/// Check whether we should create a DIE for the given Scope, return true
/// if we don't create a DIE (the corresponding DIE is null).
bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
@@ -416,6 +435,16 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
else
NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+ if (DIUnit->getDWOId()) {
+ // This CU is either a clang module DWO or a skeleton CU.
+ NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
+ DIUnit->getDWOId());
+ if (!DIUnit->getSplitDebugFilename().empty())
+ // This is a prefabricated skeleton CU.
+ NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
+ DIUnit->getSplitDebugFilename());
+ }
+
CUMap.insert(std::make_pair(DIUnit, &NewCU));
CUDieMap.insert(std::make_pair(&Die, &NewCU));
return NewCU;
@@ -436,8 +465,6 @@ void DwarfDebug::beginModule() {
const Module *M = MMI->getModule();
- FunctionDIs = makeSubprogramMap(*M);
-
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes)
return;
@@ -449,12 +476,7 @@ void DwarfDebug::beginModule() {
auto *CUNode = cast<DICompileUnit>(N);
DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
for (auto *IE : CUNode->getImportedEntities())
- ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE));
- // Stable sort to preserve the order of appearance of imported entities.
- // This is to avoid out-of-order processing of interdependent declarations
- // within the same scope, e.g. { namespace A = base; namespace B = A; }
- std::stable_sort(ScopesWithImportedEntities.begin(),
- ScopesWithImportedEntities.end(), less_first());
+ CU.addImportedEntity(IE);
for (auto *GV : CUNode->getGlobalVariables())
CU.getOrCreateGlobalVariableDIE(GV);
for (auto *SP : CUNode->getSubprograms())
@@ -467,7 +489,10 @@ void DwarfDebug::beginModule() {
for (auto *Ty : CUNode->getRetainedTypes()) {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
- CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef())));
+ DIType *RT = cast<DIType>(resolve(Ty->getRef()));
+ if (!RT->isExternalTypeRef())
+ // There is no point in force-emitting a forward declaration.
+ CU.getOrCreateTypeDIE(RT);
}
// Emit imported_modules last so that the relevant context is already
// available.
@@ -1061,12 +1086,8 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
for (const auto &MBB : *MF)
for (const auto &MI : MBB)
if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
- MI.getDebugLoc()) {
- // Did the target forget to set the FrameSetup flag for CFI insns?
- assert(!MI.isCFIInstruction() &&
- "First non-frame-setup instruction is a CFI instruction.");
+ MI.getDebugLoc())
return MI.getDebugLoc();
- }
return DebugLoc();
}
@@ -1079,8 +1100,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!MMI->hasDebugInfo())
return;
- auto DI = FunctionDIs.find(MF->getFunction());
- if (DI == FunctionDIs.end())
+ auto DI = MF->getFunction()->getSubprogram();
+ if (!DI)
return;
// Grab the lexical scopes for the function, if we don't have any of those
@@ -1127,7 +1148,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// The first mention of a function argument gets the CurrentFnBegin
// label, so arguments are visible when breaking at function entry.
const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
- if (DIVar->getTag() == dwarf::DW_TAG_arg_variable &&
+ if (DIVar->isParameter() &&
getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
@@ -1171,7 +1192,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
"endFunction should be called with the same function as beginFunction");
if (!MMI->hasDebugInfo() || LScopes.empty() ||
- !FunctionDIs.count(MF->getFunction())) {
+ !MF->getFunction()->getSubprogram()) {
// If we don't have a lexical scope for this function then there will
// be a hole in the range information. Keep note of this by setting the
// previously used section to nullptr.
@@ -1863,7 +1884,7 @@ void DwarfDebug::emitDebugLineDWO() {
assert(useSplitDwarf() && "No split dwarf?");
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLineDWOSection());
- SplitTypeUnitFileTable.Emit(*Asm->OutStreamer);
+ SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams());
}
// Emit the .debug_str.dwo section for separated dwarf. This contains the
@@ -1884,7 +1905,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
return &SplitTypeUnitFileTable;
}
-static uint64_t makeTypeSignature(StringRef Identifier) {
+uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
MD5 Hash;
Hash.update(Identifier);
// ... take the least significant 8 bytes and return those. Our MD5
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 01f34c6..4c613a9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -33,6 +33,7 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetOptions.h"
#include <memory>
namespace llvm {
@@ -49,24 +50,6 @@ class DwarfUnit;
class MachineModuleInfo;
//===----------------------------------------------------------------------===//
-/// This class is used to record source line correspondence.
-class SrcLineInfo {
- unsigned Line; // Source line number.
- unsigned Column; // Source column.
- unsigned SourceID; // Source ID number.
- MCSymbol *Label; // Label in code ID number.
-public:
- SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
- : Line(L), Column(C), SourceID(S), Label(label) {}
-
- // Accessors
- unsigned getLine() const { return Line; }
- unsigned getColumn() const { return Column; }
- unsigned getSourceID() const { return SourceID; }
- MCSymbol *getLabel() const { return Label; }
-};
-
-//===----------------------------------------------------------------------===//
/// This class is used to track local variable information.
///
/// Variables can be created from allocas, in which case they're generated from
@@ -127,14 +110,14 @@ public:
// Accessors.
const DILocalVariable *getVariable() const { return Var; }
const DILocation *getInlinedAt() const { return IA; }
- const ArrayRef<const DIExpression *> getExpression() const { return Expr; }
+ ArrayRef<const DIExpression *> getExpression() const { return Expr; }
void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
StringRef getName() const { return Var->getName(); }
const MachineInstr *getMInsn() const { return MInsn; }
- const ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+ ArrayRef<int> getFrameIndex() const { return FrameIndex; }
void addMMIEntry(const DbgVariable &V) {
assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
@@ -156,7 +139,8 @@ public:
// Translate tag to proper Dwarf tag.
dwarf::Tag getTag() const {
- if (Var->getTag() == dwarf::DW_TAG_arg_variable)
+ // FIXME: Why don't we just infer this tag and store it all along?
+ if (Var->isParameter())
return dwarf::DW_TAG_formal_parameter;
return dwarf::DW_TAG_variable;
@@ -282,11 +266,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// Holders for the various debug information flags that we might need to
/// have exposed. See accessor functions below for description.
- /// Holder for imported entities.
- typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
- ImportedEntityMap;
- ImportedEntityMap ScopesWithImportedEntities;
-
/// Map from MDNodes for user-defined types to the type units that
/// describe them.
DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
@@ -298,16 +277,12 @@ class DwarfDebug : public AsmPrinterHandler {
/// Whether to emit the pubnames/pubtypes sections.
bool HasDwarfPubSections;
- /// Whether or not to use AT_ranges for compilation units.
- bool HasCURanges;
-
- /// Whether we emitted a function into a section other than the
- /// default text.
- bool UsedNonDefaultText;
-
/// Whether to use the GNU TLS opcode (instead of the standard opcode).
bool UseGNUTLSOpcode;
+ /// Whether to emit DW_AT_[MIPS_]linkage_name.
+ bool UseLinkageNames;
+
/// Version of dwarf we're emitting.
unsigned DwarfVersion;
@@ -338,7 +313,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// True iff there are multiple CUs in this module.
bool SingleCU;
bool IsDarwin;
- bool IsPS4;
AddressPool AddrPool;
@@ -347,7 +321,8 @@ class DwarfDebug : public AsmPrinterHandler {
DwarfAccelTable AccelNamespace;
DwarfAccelTable AccelTypes;
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
+ // Identify a debugger for "tuning" the debug info.
+ DebuggerKind DebuggerTuning;
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
@@ -372,12 +347,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
- /// Compute the size and offset of a DIE given an incoming Offset.
- unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
-
- /// Compute the size and offset of all the DIEs.
- void computeSizeAndOffsets();
-
/// Collect info for variables that were optimized out.
void collectDeadVariables();
@@ -443,9 +412,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// Emit visible names into a debug ranges section.
void emitDebugRanges();
- /// Emit inline info using custom format.
- void emitDebugInlineInfo();
-
/// DWARF 5 Experimental Split Dwarf Emitters
/// Initialize common features of skeleton units.
@@ -456,10 +422,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// section.
DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
- /// Construct the split debug info compile unit for the debug info
- /// section.
- DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU);
-
/// Emit the debug info dwo section.
void emitDebugInfoDWO();
@@ -544,6 +506,9 @@ public:
/// Process end of an instruction.
void endInstruction() override;
+ /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits.
+ static uint64_t makeTypeSignature(StringRef Identifier);
+
/// Add a DIE to the set of types that we're going to pull into
/// type units.
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
@@ -558,10 +523,22 @@ public:
SymSize[Sym] = Size;
}
+ /// Returns whether to emit DW_AT_[MIPS_]linkage_name.
+ bool useLinkageNames() const { return UseLinkageNames; }
+
/// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
/// standard DW_OP_form_tls_address opcode
bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
+ /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
+ ///
+ /// Returns whether we are "tuning" for a given debugger.
+ /// @{
+ bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
+ bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
+ bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
+ /// @}
+
// Experimental DWARF5 features.
/// Returns whether or not to emit tables that dwarf consumers can
@@ -604,9 +581,6 @@ public:
DwarfCompileUnit *lookupUnit(const DIE *CU) const {
return CUDieMap.lookup(CU);
}
- /// isSubprogramContext - Return true if Context is either a subprogram
- /// or another context nested inside a subprogram.
- bool isSubprogramContext(const MDNode *Context);
void addSubprogramNames(const DISubprogram *SP, DIE &Die);
@@ -622,14 +596,6 @@ public:
const MachineFunction *getCurrentFunction() const { return CurFn; }
- iterator_range<ImportedEntityMap::const_iterator>
- findImportedEntitiesForScope(const MDNode *Scope) const {
- return make_range(std::equal_range(
- ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
- std::pair<const MDNode *, const MDNode *>(Scope, nullptr),
- less_first()));
- }
-
/// A helper function to check whether the DIE for a given Scope is
/// going to be null.
bool isLexicalScopeDIENull(LexicalScope *Scope);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index a2799b8..7b5b831 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -211,12 +211,15 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
return AddMachineRegPiece(MachineReg, SizeInBits,
getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
}
- case dwarf::DW_OP_plus: {
- // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
+ case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_minus: {
+ // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
+ // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
auto N = I.getNext();
if (N != E && N->getOp() == dwarf::DW_OP_deref) {
unsigned Offset = I->getArg(0);
- ValidReg = AddMachineRegIndirect(MachineReg, Offset);
+ ValidReg = AddMachineRegIndirect(
+ MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
std::advance(I, 2);
break;
} else
@@ -255,6 +258,12 @@ void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I,
EmitOp(dwarf::DW_OP_plus_uconst);
EmitUnsigned(I->getArg(0));
break;
+ case dwarf::DW_OP_minus:
+ // There is no OP_minus_uconst.
+ EmitOp(dwarf::DW_OP_constu);
+ EmitUnsigned(I->getArg(0));
+ EmitOp(dwarf::DW_OP_minus);
+ break;
case dwarf::DW_OP_deref:
EmitOp(dwarf::DW_OP_deref);
break;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 3555822..d75fea5 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -192,18 +192,19 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
DIEInteger(1));
}
-void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, uint64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(false, Integer);
Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
}
-void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) {
+void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
+ uint64_t Integer) {
addUInt(Block, (dwarf::Attribute)0, Form, Integer);
}
-void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, int64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(true, Integer);
@@ -222,9 +223,10 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
DIEString(DU->getStringPool().getEntry(*Asm, String)));
}
-DIE::value_iterator DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute,
- dwarf::Form Form,
- const MCSymbol *Label) {
+DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die,
+ dwarf::Attribute Attribute,
+ dwarf::Form Form,
+ const MCSymbol *Label) {
return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label));
}
@@ -277,6 +279,13 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) {
dwarf::DW_FORM_ref_sig8, DIETypeSignature(Type));
}
+void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
+ StringRef Identifier) {
+ uint64_t Signature = DD->makeTypeSignature(Identifier);
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8,
+ DIEInteger(Signature));
+}
+
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
DIEEntry Entry) {
const DIE *DieCU = Die.getUnitOrNull();
@@ -292,8 +301,6 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
}
DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
- assert(Tag != dwarf::DW_TAG_auto_variable &&
- Tag != dwarf::DW_TAG_arg_variable);
DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag));
if (N)
insertDIE(N, &Die);
@@ -445,7 +452,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// Find the __forwarding field and the variable field in the __Block_byref
// struct.
- DINodeArray Fields = cast<DICompositeTypeBase>(TmpTy)->getElements();
+ DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements();
const DIDerivedType *varField = nullptr;
const DIDerivedType *forwardingField = nullptr;
@@ -506,34 +513,35 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
/// Return true if type encoding is unsigned.
static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
- if (auto *DTy = dyn_cast<DIDerivedTypeBase>(Ty)) {
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
+ return false;
+
+ // (Pieces of) aggregate types that get hacked apart by SROA may be
+ // represented by a constant. Encode them as unsigned bytes.
+ return true;
+ }
+
+ if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
dwarf::Tag T = (dwarf::Tag)Ty->getTag();
// Encode pointer constants as unsigned bytes. This is used at least for
// null pointer constant emission.
- // (Pieces of) aggregate types that get hacked apart by SROA may also be
- // represented by a constant. Encode them as unsigned bytes.
// FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
// here, but accept them for now due to a bug in SROA producing bogus
// dbg.values.
- if (T == dwarf::DW_TAG_array_type ||
- T == dwarf::DW_TAG_class_type ||
- T == dwarf::DW_TAG_pointer_type ||
+ if (T == dwarf::DW_TAG_pointer_type ||
T == dwarf::DW_TAG_ptr_to_member_type ||
T == dwarf::DW_TAG_reference_type ||
- T == dwarf::DW_TAG_rvalue_reference_type ||
- T == dwarf::DW_TAG_structure_type ||
- T == dwarf::DW_TAG_union_type)
+ T == dwarf::DW_TAG_rvalue_reference_type)
return true;
assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
T == dwarf::DW_TAG_volatile_type ||
- T == dwarf::DW_TAG_restrict_type ||
- T == dwarf::DW_TAG_enumeration_type);
- if (DITypeRef Deriv = DTy->getBaseType())
- return isUnsignedDIType(DD, DD->resolve(Deriv));
- // FIXME: Enums without a fixed underlying type have unknown signedness
- // here, leading to incorrectly emitted constants.
- assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type);
- return false;
+ T == dwarf::DW_TAG_restrict_type);
+ DITypeRef Deriv = DTy->getBaseType();
+ assert(Deriv && "Expected valid base type");
+ return isUnsignedDIType(DD, DD->resolve(Deriv));
}
auto *BTy = cast<DIBasicType>(Ty);
@@ -659,7 +667,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
}
void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
- if (!LinkageName.empty())
+ if (!LinkageName.empty() && DD->useLinkageNames())
addString(Die,
DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
: dwarf::DW_AT_MIPS_linkage_name,
@@ -685,6 +693,8 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
return getOrCreateNameSpace(NS);
if (auto *SP = dyn_cast<DISubprogram>(Context))
return getOrCreateSubprogramDIE(SP);
+ if (auto *M = dyn_cast<DIModule>(Context))
+ return getOrCreateModule(M);
return getDIE(Context);
}
@@ -700,7 +710,8 @@ DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
- updateAcceleratorTables(Context, Ty, TyDIE);
+ if (!Ty->isExternalTypeRef())
+ updateAcceleratorTables(Context, Ty, TyDIE);
return &TyDIE;
}
@@ -753,7 +764,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
const DIType *Ty, const DIE &TyDIE) {
if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
bool IsImplementation = 0;
- if (auto *CT = dyn_cast<DICompositeTypeBase>(Ty)) {
+ if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
// A runtime language of 0 actually means C/C++ and that any
// non-negative value is some version of Objective-C/C++.
IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
@@ -795,8 +806,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
// Reverse iterate over our list to go from the outermost construct to the
// innermost.
- for (auto I = Parents.rbegin(), E = Parents.rend(); I != E; ++I) {
- const DIScope *Ctx = *I;
+ for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) {
StringRef Name = Ctx->getName();
if (Name.empty() && isa<DINamespace>(Ctx))
Name = "(anonymous namespace)";
@@ -843,7 +853,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// Add size if non-zero (derived types might be zero-sized.)
if (Size && Tag != dwarf::DW_TAG_pointer_type
- && Tag != dwarf::DW_TAG_ptr_to_member_type)
+ && Tag != dwarf::DW_TAG_ptr_to_member_type
+ && Tag != dwarf::DW_TAG_reference_type
+ && Tag != dwarf::DW_TAG_rvalue_reference_type)
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
if (Tag == dwarf::DW_TAG_ptr_to_member_type)
@@ -899,6 +911,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+ if (CTy->isExternalTypeRef()) {
+ StringRef Identifier = CTy->getIdentifier();
+ assert(!Identifier.empty() && "external type ref without identifier");
+ addFlag(Buffer, dwarf::DW_AT_declaration);
+ return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier);
+ }
+
// Add name if not anonymous or intermediate type.
StringRef Name = CTy->getName();
@@ -1134,6 +1153,14 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
"definition DIE was created in "
"getOrCreateSubprogramDIE");
DeclLinkageName = SPDecl->getLinkageName();
+ unsigned DeclID =
+ getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory());
+ unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory());
+ if (DeclID != DefID)
+ addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
+
+ if (SP->getLine() != SPDecl->getLine())
+ addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine());
}
// Add function template parameters.
@@ -1180,11 +1207,10 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
Language == dwarf::DW_LANG_ObjC))
addFlag(SPDie, dwarf::DW_AT_prototyped);
- const DISubroutineType *SPTy = SP->getType();
- assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type &&
- "the type of a subprogram should be a subroutine");
+ DITypeRefArray Args;
+ if (const DISubroutineType *SPTy = SP->getType())
+ Args = SPTy->getTypeArray();
- auto Args = SPTy->getTypeArray();
// Add a return type. If this is a type like a C/C++ void type we don't add a
// return type.
if (Args.size())
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 44d9d22..82760bf 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -113,13 +113,6 @@ protected:
DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
- /// Add a string attribute data and value.
- ///
- /// This is guaranteed to be in the local string pool instead of indirected.
- void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
-
- void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
-
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
public:
@@ -162,9 +155,6 @@ public:
virtual void addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {}
- /// Add a new name to the namespace accelerator table.
- void addAccelNamespace(StringRef Name, const DIE &Die);
-
/// Returns the DIE map slot for the specified debug variable.
///
/// We delegate the request to DwarfDebug when the MDNode can be part of the
@@ -186,14 +176,14 @@ public:
void addFlag(DIE &Die, dwarf::Attribute Attribute);
/// Add an unsigned integer attribute data and value.
- void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
- uint64_t Integer);
+ void addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, uint64_t Integer);
- void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer);
+ void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer);
/// Add an signed integer attribute data and value.
- void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
- int64_t Integer);
+ void addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, int64_t Integer);
void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
@@ -206,8 +196,10 @@ public:
void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
/// Add a Dwarf label attribute data and value.
- DIE::value_iterator addLabel(DIE &Die, dwarf::Attribute Attribute,
- dwarf::Form Form, const MCSymbol *Label);
+ DIEValueList::value_iterator addLabel(DIEValueList &Die,
+ dwarf::Attribute Attribute,
+ dwarf::Form Form,
+ const MCSymbol *Label);
void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
@@ -228,7 +220,11 @@ public:
/// Add a DIE attribute data and value.
void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry);
+ /// Add a type's DW_AT_signature and set the declaration flag.
void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
+ /// Add an attribute containing the type signature for a unique identifier.
+ void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
+ StringRef Identifier);
/// Add block data.
void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 49ef8d3..e24dcb1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -662,9 +662,8 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
Entry = TypeInfos.size();
}
- for (std::vector<const GlobalValue *>::const_reverse_iterator
- I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
- const GlobalValue *GV = *I;
+ for (const GlobalValue *GV : make_range(TypeInfos.rbegin(),
+ TypeInfos.rend())) {
if (VerboseAsm)
Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index e42e082..c6a0e9d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -76,10 +76,6 @@ protected:
SmallVectorImpl<ActionEntry> &Actions,
SmallVectorImpl<unsigned> &FirstActions);
- /// Return `true' if this is a call to a function marked `nounwind'. Return
- /// `false' otherwise.
- bool callToNoUnwindFunction(const MachineInstr *MI);
-
void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
RangeMapType &PadMap);
@@ -131,6 +127,10 @@ public:
void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
void beginInstruction(const MachineInstr *MI) override {}
void endInstruction() override {}
+
+ /// Return `true' if this is a call to a function marked `nounwind'. Return
+ /// `false' otherwise.
+ static bool callToNoUnwindFunction(const MachineInstr *MI);
};
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index eb9e4c1..6a023b9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -48,7 +48,7 @@ void llvm::linkErlangGCPrinter() {}
void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
MCStreamer &OS = *AP.OutStreamer;
- unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+ unsigned IntPtrSize = M.getDataLayout().getPointerSize();
// Put this in a custom .note section.
OS.SwitchSection(
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 2ceec61..c09ef6a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -93,7 +93,7 @@ void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
///
void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
- unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+ unsigned IntPtrSize = M.getDataLayout().getPointerSize();
AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(M, AP, "code_end");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 6610ac7..c2c0f84 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -27,15 +27,15 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
auto *Scope = cast<DIScope>(S);
StringRef Dir = Scope->getDirectory(),
Filename = Scope->getFilename();
- char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
- if (Result)
- return Result;
+ std::string &Filepath =
+ DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
+ if (!Filepath.empty())
+ return Filepath;
// Clang emits directory and relative filename info into the IR, but CodeView
// operates on full paths. We could change Clang to emit full paths too, but
// that would increase the IR size and probably not needed for other users.
// For now, just concatenate and canonicalize the path here.
- std::string Filepath;
if (Filename.find(':') == 1)
Filepath = Filename;
else
@@ -74,8 +74,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
Filepath.erase(Cursor, 1);
- Result = strdup(Filepath.c_str());
- return StringRef(Result);
+ return Filepath;
}
void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
@@ -253,7 +252,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
}
FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
- // Emit a line table subsection, requred to do PC-to-file:line lookup.
+ // Emit a line table subsection, required to do PC-to-file:line lookup.
Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName));
Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(),
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index 43d1a43..78068e0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -98,7 +98,7 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
}
} FileNameRegistry;
- typedef std::map<std::pair<StringRef, StringRef>, char *>
+ typedef std::map<std::pair<StringRef, StringRef>, std::string>
DirAndFilenameToFilepathMapTy;
DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap;
StringRef getFullFilepath(const MDNode *S);
@@ -116,14 +116,6 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
public:
WinCodeViewLineTables(AsmPrinter *Asm);
- ~WinCodeViewLineTables() override {
- for (DirAndFilenameToFilepathMapTy::iterator
- I = DirAndFilenameToFilepathMap.begin(),
- E = DirAndFilenameToFilepathMap.end();
- I != E; ++I)
- free(I->second);
- }
-
void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {}
/// \brief Emit the COFF section that holds the line table information.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index a2b9316..48b7104 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -30,6 +30,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWin64EH.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -37,6 +38,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
@@ -62,9 +64,9 @@ void WinException::beginFunction(const MachineFunction *MF) {
// If any landing pads survive, we need an EH table.
bool hasLandingPads = !MMI->getLandingPads().empty();
+ bool hasEHFunclets = MMI->hasEHFunclets();
const Function *F = MF->getFunction();
- const Function *ParentF = MMI->getWinEHParent(F);
shouldEmitMoves = Asm->needsSEHMoves();
@@ -78,49 +80,23 @@ void WinException::beginFunction(const MachineFunction *MF) {
F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
F->needsUnwindTableEntry();
- shouldEmitPersonality = forceEmitPersonality || (hasLandingPads &&
- PerEncoding != dwarf::DW_EH_PE_omit && Per);
+ shouldEmitPersonality =
+ forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per);
unsigned LSDAEncoding = TLOF.getLSDAEncoding();
shouldEmitLSDA = shouldEmitPersonality &&
LSDAEncoding != dwarf::DW_EH_PE_omit;
- // If we're not using CFI, we don't want the CFI or the personality. If
- // WinEHPrepare outlined something, we should emit the LSDA.
+ // If we're not using CFI, we don't want the CFI or the personality, but we
+ // might want EH tables if we had EH pads.
if (!Asm->MAI->usesWindowsCFI()) {
- bool HasOutlinedChildren =
- F->hasFnAttribute("wineh-parent") && F == ParentF;
- shouldEmitLSDA = HasOutlinedChildren;
+ shouldEmitLSDA = hasEHFunclets;
shouldEmitPersonality = false;
return;
}
- // If this was an outlined handler, we need to define the label corresponding
- // to the offset of the parent frame relative to the stack pointer after the
- // prologue.
- if (F != ParentF) {
- WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
- auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F);
- if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) {
- MCSymbol *HandlerTypeParentFrameOffset =
- Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
- GlobalValue::getRealLinkageName(F->getName()));
-
- // Emit a symbol assignment.
- Asm->OutStreamer->EmitAssignment(
- HandlerTypeParentFrameOffset,
- MCConstantExpr::create(I->second, Asm->OutContext));
- }
- }
-
- if (shouldEmitMoves || shouldEmitPersonality)
- Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym);
-
- if (shouldEmitPersonality) {
- const MCSymbol *PersHandlerSym =
- TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
- Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
- }
+ beginFunclet(MF->front(), Asm->CurrentFnSym);
}
/// endFunction - Gather and emit post-function exception information.
@@ -134,43 +110,158 @@ void WinException::endFunction(const MachineFunction *MF) {
if (F->hasPersonalityFn())
Per = classifyEHPersonality(F->getPersonalityFn());
- // Get rid of any dead landing pads if we're not using a Windows EH scheme. In
- // Windows EH schemes, the landing pad is not actually reachable. It only
- // exists so that we can emit the right table data.
- if (!isMSVCEHPersonality(Per))
+ // Get rid of any dead landing pads if we're not using funclets. In funclet
+ // schemes, the landing pad is not actually reachable. It only exists so
+ // that we can emit the right table data.
+ if (!isFuncletEHPersonality(Per))
MMI->TidyLandingPads();
+ endFunclet();
+
+ // endFunclet will emit the necessary .xdata tables for x64 SEH.
+ if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets())
+ return;
+
if (shouldEmitPersonality || shouldEmitLSDA) {
Asm->OutStreamer->PushSection();
- if (shouldEmitMoves || shouldEmitPersonality) {
- // Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
- } else {
- // Just switch sections to the right xdata section. This use of
- // CurrentFnSym assumes that we only emit the LSDA when ending the parent
- // function.
- MCSection *XData = WinEH::UnwindEmitter::getXDataSection(
- Asm->CurrentFnSym, Asm->OutContext);
- Asm->OutStreamer->SwitchSection(XData);
- }
+ // Just switch sections to the right xdata section. This use of CurrentFnSym
+ // assumes that we only emit the LSDA when ending the parent function.
+ MCSection *XData = WinEH::UnwindEmitter::getXDataSection(Asm->CurrentFnSym,
+ Asm->OutContext);
+ Asm->OutStreamer->SwitchSection(XData);
// Emit the tables appropriate to the personality function in use. If we
// don't recognize the personality, assume it uses an Itanium-style LSDA.
if (Per == EHPersonality::MSVC_Win64SEH)
- emitCSpecificHandlerTable();
+ emitCSpecificHandlerTable(MF);
else if (Per == EHPersonality::MSVC_X86SEH)
emitExceptHandlerTable(MF);
else if (Per == EHPersonality::MSVC_CXX)
emitCXXFrameHandler3Table(MF);
+ else if (Per == EHPersonality::CoreCLR)
+ emitCLRExceptionTable(MF);
else
emitExceptionTable();
Asm->OutStreamer->PopSection();
}
+}
+
+/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock.
+static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
+ const MachineBasicBlock *MBB) {
+ if (!MBB)
+ return nullptr;
+ assert(MBB->isEHFuncletEntry());
+
+ // Give catches and cleanups a name based off of their parent function and
+ // their funclet entry block's number.
+ const MachineFunction *MF = MBB->getParent();
+ const Function *F = MF->getFunction();
+ StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ MCContext &Ctx = MF->getContext();
+ StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch";
+ return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" +
+ Twine(MBB->getNumber()) + "@?0?" +
+ FuncLinkageName + "@4HA");
+}
+
+void WinException::beginFunclet(const MachineBasicBlock &MBB,
+ MCSymbol *Sym) {
+ CurrentFuncletEntry = &MBB;
+
+ const Function *F = Asm->MF->getFunction();
+ // If a symbol was not provided for the funclet, invent one.
+ if (!Sym) {
+ Sym = getMCSymbolForMBB(Asm, &MBB);
+
+ // Describe our funclet symbol as a function with internal linkage.
+ Asm->OutStreamer->BeginCOFFSymbolDef(Sym);
+ Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ Asm->OutStreamer->EndCOFFSymbolDef();
+
+ // We want our funclet's entry point to be aligned such that no nops will be
+ // present after the label.
+ Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()),
+ F);
+
+ // Now that we've emitted the alignment directive, point at our funclet.
+ Asm->OutStreamer->EmitLabel(Sym);
+ }
+
+ // Mark 'Sym' as starting our funclet.
if (shouldEmitMoves || shouldEmitPersonality)
+ Asm->OutStreamer->EmitWinCFIStartProc(Sym);
+
+ if (shouldEmitPersonality) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const Function *PerFn = nullptr;
+
+ // Determine which personality routine we are using for this funclet.
+ if (F->hasPersonalityFn())
+ PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ const MCSymbol *PersHandlerSym =
+ TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI);
+
+ // Classify the personality routine so that we may reason about it.
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F->hasPersonalityFn())
+ Per = classifyEHPersonality(F->getPersonalityFn());
+
+ // Do not emit a .seh_handler directive if it is a C++ cleanup funclet.
+ if (Per != EHPersonality::MSVC_CXX ||
+ !CurrentFuncletEntry->isCleanupFuncletEntry())
+ Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
+ }
+}
+
+void WinException::endFunclet() {
+ // No funclet to process? Great, we have nothing to do.
+ if (!CurrentFuncletEntry)
+ return;
+
+ if (shouldEmitMoves || shouldEmitPersonality) {
+ const Function *F = Asm->MF->getFunction();
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F->hasPersonalityFn())
+ Per = classifyEHPersonality(F->getPersonalityFn());
+
+ // The .seh_handlerdata directive implicitly switches section, push the
+ // current section so that we may return to it.
+ Asm->OutStreamer->PushSection();
+
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+
+ if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality &&
+ !CurrentFuncletEntry->isCleanupFuncletEntry()) {
+ // If this is a C++ catch funclet (or the parent function),
+ // emit a reference to the LSDA for the parent function.
+ StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
+ Twine("$cppxdata$", FuncLinkageName));
+ Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
+ } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() &&
+ !CurrentFuncletEntry->isEHFuncletEntry()) {
+ // If this is the parent function in Win64 SEH, emit the LSDA immediately
+ // following .seh_handlerdata.
+ emitCSpecificHandlerTable(Asm->MF);
+ }
+
+ // Switch back to the previous section now that we are done writing to
+ // .xdata.
+ Asm->OutStreamer->PopSection();
+
+ // Emit a .seh_endproc directive to mark the end of the function.
Asm->OutStreamer->EmitWinCFIEndProc();
+ }
+
+ // Let's make sure we don't try to end the same funclet twice.
+ CurrentFuncletEntry = nullptr;
}
const MCExpr *WinException::create32bitRef(const MCSymbol *Value) {
@@ -188,6 +279,202 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
return create32bitRef(Asm->getSymbol(GV));
}
+const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+ return MCBinaryExpr::createAdd(create32bitRef(Label),
+ MCConstantExpr::create(1, Asm->OutContext),
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffset(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom) {
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(OffsetOf, Asm->OutContext),
+ MCSymbolRefExpr::create(OffsetFrom, Asm->OutContext), Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom) {
+ return MCBinaryExpr::createAdd(getOffset(OffsetOf, OffsetFrom),
+ MCConstantExpr::create(1, Asm->OutContext),
+ Asm->OutContext);
+}
+
+int WinException::getFrameIndexOffset(int FrameIndex,
+ const WinEHFuncInfo &FuncInfo) {
+ const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
+ unsigned UnusedReg;
+ if (Asm->MAI->usesWindowsCFI())
+ return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg);
+ // For 32-bit, offsets should be relative to the end of the EH registration
+ // node. For 64-bit, it's relative to SP at the end of the prologue.
+ assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
+ int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
+ Offset += FuncInfo.EHRegNodeEndOffset;
+ return Offset;
+}
+
+namespace {
+
+/// Top-level state used to represent unwind to caller
+const int NullState = -1;
+
+struct InvokeStateChange {
+ /// EH Label immediately after the last invoke in the previous state, or
+ /// nullptr if the previous state was the null state.
+ const MCSymbol *PreviousEndLabel;
+
+ /// EH label immediately before the first invoke in the new state, or nullptr
+ /// if the new state is the null state.
+ const MCSymbol *NewStartLabel;
+
+ /// State of the invoke following NewStartLabel, or NullState to indicate
+ /// the presence of calls which may unwind to caller.
+ int NewState;
+};
+
+/// Iterator that reports all the invoke state changes in a range of machine
+/// basic blocks. Changes to the null state are reported whenever a call that
+/// may unwind to caller is encountered. The MBB range is expected to be an
+/// entire function or funclet, and the start and end of the range are treated
+/// as being in the NullState even if there's not an unwind-to-caller call
+/// before the first invoke or after the last one (i.e., the first state change
+/// reported is the first change to something other than NullState, and a
+/// change back to NullState is always reported at the end of iteration).
+class InvokeStateChangeIterator {
+ InvokeStateChangeIterator(const WinEHFuncInfo &EHInfo,
+ MachineFunction::const_iterator MFI,
+ MachineFunction::const_iterator MFE,
+ MachineBasicBlock::const_iterator MBBI,
+ int BaseState)
+ : EHInfo(EHInfo), MFI(MFI), MFE(MFE), MBBI(MBBI), BaseState(BaseState) {
+ LastStateChange.PreviousEndLabel = nullptr;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ scan();
+ }
+
+public:
+ static iterator_range<InvokeStateChangeIterator>
+ range(const WinEHFuncInfo &EHInfo, MachineFunction::const_iterator Begin,
+ MachineFunction::const_iterator End, int BaseState = NullState) {
+ // Reject empty ranges to simplify bookkeeping by ensuring that we can get
+ // the end of the last block.
+ assert(Begin != End);
+ auto BlockBegin = Begin->begin();
+ auto BlockEnd = std::prev(End)->end();
+ return make_range(
+ InvokeStateChangeIterator(EHInfo, Begin, End, BlockBegin, BaseState),
+ InvokeStateChangeIterator(EHInfo, End, End, BlockEnd, BaseState));
+ }
+
+ // Iterator methods.
+ bool operator==(const InvokeStateChangeIterator &O) const {
+ assert(BaseState == O.BaseState);
+ // Must be visiting same block.
+ if (MFI != O.MFI)
+ return false;
+ // Must be visiting same isntr.
+ if (MBBI != O.MBBI)
+ return false;
+ // At end of block/instr iteration, we can still have two distinct states:
+ // one to report the final EndLabel, and another indicating the end of the
+ // state change iteration. Check for CurrentEndLabel equality to
+ // distinguish these.
+ return CurrentEndLabel == O.CurrentEndLabel;
+ }
+
+ bool operator!=(const InvokeStateChangeIterator &O) const {
+ return !operator==(O);
+ }
+ InvokeStateChange &operator*() { return LastStateChange; }
+ InvokeStateChange *operator->() { return &LastStateChange; }
+ InvokeStateChangeIterator &operator++() { return scan(); }
+
+private:
+ InvokeStateChangeIterator &scan();
+
+ const WinEHFuncInfo &EHInfo;
+ const MCSymbol *CurrentEndLabel = nullptr;
+ MachineFunction::const_iterator MFI;
+ MachineFunction::const_iterator MFE;
+ MachineBasicBlock::const_iterator MBBI;
+ InvokeStateChange LastStateChange;
+ bool VisitingInvoke = false;
+ int BaseState;
+};
+
+} // end anonymous namespace
+
+InvokeStateChangeIterator &InvokeStateChangeIterator::scan() {
+ bool IsNewBlock = false;
+ for (; MFI != MFE; ++MFI, IsNewBlock = true) {
+ if (IsNewBlock)
+ MBBI = MFI->begin();
+ for (auto MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ const MachineInstr &MI = *MBBI;
+ if (!VisitingInvoke && LastStateChange.NewState != BaseState &&
+ MI.isCall() && !EHStreamer::callToNoUnwindFunction(&MI)) {
+ // Indicate a change of state to the null state. We don't have
+ // start/end EH labels handy but the caller won't expect them for
+ // null state regions.
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ CurrentEndLabel = nullptr;
+ // Don't re-visit this instr on the next scan
+ ++MBBI;
+ return *this;
+ }
+
+ // All other state changes are at EH labels before/after invokes.
+ if (!MI.isEHLabel())
+ continue;
+ MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+ if (Label == CurrentEndLabel) {
+ VisitingInvoke = false;
+ continue;
+ }
+ auto InvokeMapIter = EHInfo.LabelToStateMap.find(Label);
+ // Ignore EH labels that aren't the ones inserted before an invoke
+ if (InvokeMapIter == EHInfo.LabelToStateMap.end())
+ continue;
+ auto &StateAndEnd = InvokeMapIter->second;
+ int NewState = StateAndEnd.first;
+ // Keep track of the fact that we're between EH start/end labels so
+ // we know not to treat the inoke we'll see as unwinding to caller.
+ VisitingInvoke = true;
+ if (NewState == LastStateChange.NewState) {
+ // The state isn't actually changing here. Record the new end and
+ // keep going.
+ CurrentEndLabel = StateAndEnd.second;
+ continue;
+ }
+ // Found a state change to report
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = Label;
+ LastStateChange.NewState = NewState;
+ // Start keeping track of the new current end
+ CurrentEndLabel = StateAndEnd.second;
+ // Don't re-visit this instr on the next scan
+ ++MBBI;
+ return *this;
+ }
+ }
+ // Iteration hit the end of the block range.
+ if (LastStateChange.NewState != BaseState) {
+ // Report the end of the last new state
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ // Leave CurrentEndLabel non-null to distinguish this state from end.
+ assert(CurrentEndLabel != nullptr);
+ return *this;
+ }
+ // We've reported all state changes and hit the end state.
+ CurrentEndLabel = nullptr;
+ return *this;
+}
+
/// Emit the language-specific data that __C_specific_handler expects. This
/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
/// up after faults with __try, __except, and __finally. The typeinfo values
@@ -216,135 +503,156 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
/// imagerel32 LabelLPad; // Zero means __finally.
/// } Entries[NumEntries];
/// };
-void WinException::emitCSpecificHandlerTable() {
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-
- // Simplifying assumptions for first implementation:
- // - Cleanups are not implemented.
- // - Filters are not implemented.
-
- // The Itanium LSDA table sorts similar landing pads together to simplify the
- // actions table, but we don't need that.
- SmallVector<const LandingPadInfo *, 64> LandingPads;
- LandingPads.reserve(PadInfos.size());
- for (const auto &LP : PadInfos)
- LandingPads.push_back(&LP);
-
- // Compute label ranges for call sites as we would for the Itanium LSDA, but
- // use an all zero action table because we aren't using these actions.
- SmallVector<unsigned, 64> FirstActions;
- FirstActions.resize(LandingPads.size());
- SmallVector<CallSiteEntry, 64> CallSites;
- computeCallSiteTable(CallSites, LandingPads, FirstActions);
-
- MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
- MCSymbol *EHFuncEndSym = Asm->getFunctionEnd();
-
- // Emit the number of table entries.
- unsigned NumEntries = 0;
- for (const CallSiteEntry &CSE : CallSites) {
- if (!CSE.LPad)
- continue; // Ignore gaps.
- NumEntries += CSE.LPad->SEHHandlers.size();
+void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
+ auto &OS = *Asm->OutStreamer;
+ MCContext &Ctx = Asm->OutContext;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ // Emit a label assignment with the SEH frame offset so we can use it for
+ // llvm.x86.seh.recoverfp.
+ StringRef FLinkageName =
+ GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+ MCSymbol *ParentFrameOffset =
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ const MCExpr *MCOffset =
+ MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+
+ // Use the assembler to compute the number of table entries through label
+ // difference and division.
+ MCSymbol *TableBegin =
+ Ctx.createTempSymbol("lsda_begin", /*AlwaysAddSuffix=*/true);
+ MCSymbol *TableEnd =
+ Ctx.createTempSymbol("lsda_end", /*AlwaysAddSuffix=*/true);
+ const MCExpr *LabelDiff = getOffset(TableEnd, TableBegin);
+ const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx);
+ const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx);
+ AddComment("Number of call sites");
+ OS.EmitValue(EntryCount, 4);
+
+ OS.EmitLabel(TableBegin);
+
+ // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only
+ // models exceptions from invokes. LLVM also allows arbitrary reordering of
+ // the code, so our tables end up looking a bit different. Rather than
+ // trying to match MSVC's tables exactly, we emit a denormalized table. For
+ // each range of invokes in the same state, we emit table entries for all
+ // the actions that would be taken in that state. This means our tables are
+ // slightly bigger, which is OK.
+ const MCSymbol *LastStartLabel = nullptr;
+ int LastEHState = -1;
+ // Break out before we enter into a finally funclet.
+ // FIXME: We need to emit separate EH tables for cleanups.
+ MachineFunction::const_iterator End = MF->end();
+ MachineFunction::const_iterator Stop = std::next(MF->begin());
+ while (Stop != End && !Stop->isEHFuncletEntry())
+ ++Stop;
+ for (const auto &StateChange :
+ InvokeStateChangeIterator::range(FuncInfo, MF->begin(), Stop)) {
+ // Emit all the actions for the state we just transitioned out of
+ // if it was not the null state
+ if (LastEHState != -1)
+ emitSEHActionsForRange(FuncInfo, LastStartLabel,
+ StateChange.PreviousEndLabel, LastEHState);
+ LastStartLabel = StateChange.NewStartLabel;
+ LastEHState = StateChange.NewState;
}
- Asm->OutStreamer->EmitIntValue(NumEntries, 4);
- // If there are no actions, we don't need to iterate again.
- if (NumEntries == 0)
- return;
+ OS.EmitLabel(TableEnd);
+}
- // Emit the four-label records for each call site entry. The table has to be
- // sorted in layout order, and the call sites should already be sorted.
- for (const CallSiteEntry &CSE : CallSites) {
- // Ignore gaps. Unlike the Itanium model, unwinding through a frame without
- // an EH table entry will propagate the exception rather than terminating
- // the program.
- if (!CSE.LPad)
- continue;
- const LandingPadInfo *LPad = CSE.LPad;
-
- // Compute the label range. We may reuse the function begin and end labels
- // rather than forming new ones.
- const MCExpr *Begin =
- create32bitRef(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
- const MCExpr *End;
- if (CSE.EndLabel) {
- // The interval is half-open, so we have to add one to include the return
- // address of the last invoke in the range.
- End = MCBinaryExpr::createAdd(create32bitRef(CSE.EndLabel),
- MCConstantExpr::create(1, Asm->OutContext),
- Asm->OutContext);
+void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+ const MCSymbol *BeginLabel,
+ const MCSymbol *EndLabel, int State) {
+ auto &OS = *Asm->OutStreamer;
+ MCContext &Ctx = Asm->OutContext;
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ assert(BeginLabel && EndLabel);
+ while (State != -1) {
+ const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State];
+ const MCExpr *FilterOrFinally;
+ const MCExpr *ExceptOrNull;
+ auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ if (UME.IsFinally) {
+ FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler));
+ ExceptOrNull = MCConstantExpr::create(0, Ctx);
} else {
- End = create32bitRef(EHFuncEndSym);
+ // For an except, the filter can be 1 (catch-all) or a function
+ // label.
+ FilterOrFinally = UME.Filter ? create32bitRef(UME.Filter)
+ : MCConstantExpr::create(1, Ctx);
+ ExceptOrNull = create32bitRef(Handler->getSymbol());
}
- // Emit an entry for each action.
- for (SEHHandler Handler : LPad->SEHHandlers) {
- Asm->OutStreamer->EmitValue(Begin, 4);
- Asm->OutStreamer->EmitValue(End, 4);
-
- // Emit the filter or finally function pointer, if present. Otherwise,
- // emit '1' to indicate a catch-all.
- const Function *F = Handler.FilterOrFinally;
- if (F)
- Asm->OutStreamer->EmitValue(create32bitRef(Asm->getSymbol(F)), 4);
- else
- Asm->OutStreamer->EmitIntValue(1, 4);
-
- // Emit the recovery address, if present. Otherwise, this must be a
- // finally.
- const BlockAddress *BA = Handler.RecoverBA;
- if (BA)
- Asm->OutStreamer->EmitValue(
- create32bitRef(Asm->GetBlockAddressSymbol(BA)), 4);
- else
- Asm->OutStreamer->EmitIntValue(0, 4);
- }
+ AddComment("LabelStart");
+ OS.EmitValue(getLabelPlusOne(BeginLabel), 4);
+ AddComment("LabelEnd");
+ OS.EmitValue(getLabelPlusOne(EndLabel), 4);
+ AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
+ : "CatchAll");
+ OS.EmitValue(FilterOrFinally, 4);
+ AddComment(UME.IsFinally ? "Null" : "ExceptionHandler");
+ OS.EmitValue(ExceptOrNull, 4);
+
+ assert(UME.ToState < State && "states should decrease");
+ State = UME.ToState;
}
}
void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
const Function *F = MF->getFunction();
- const Function *ParentF = MMI->getWinEHParent(F);
auto &OS = *Asm->OutStreamer;
- WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
- StringRef ParentLinkageName =
- GlobalValue::getRealLinkageName(ParentF->getName());
+ StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable;
MCSymbol *FuncInfoXData = nullptr;
if (shouldEmitPersonality) {
- FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
- Twine("$cppxdata$", ParentLinkageName));
- OS.EmitValue(create32bitRef(FuncInfoXData), 4);
-
- extendIP2StateTable(MF, ParentF, FuncInfo);
-
- // Defer emission until we've visited the parent function and all the catch
- // handlers. Cleanups don't contribute to the ip2state table, so don't count
- // them.
- if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F))
- return;
- ++FuncInfo.NumIPToStateFuncsVisited;
- if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size())
- return;
+ // If we're 64-bit, emit a pointer to the C++ EH data, and build a map from
+ // IPs to state numbers.
+ FuncInfoXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", FuncLinkageName));
+ computeIP2StateTable(MF, FuncInfo, IPToStateTable);
} else {
- FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(ParentLinkageName);
- emitEHRegistrationOffsetLabel(FuncInfo, ParentLinkageName);
+ FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(FuncLinkageName);
}
+ int UnwindHelpOffset = 0;
+ if (Asm->MAI->usesWindowsCFI())
+ UnwindHelpOffset =
+ getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo);
+
MCSymbol *UnwindMapXData = nullptr;
MCSymbol *TryBlockMapXData = nullptr;
MCSymbol *IPToStateXData = nullptr;
- if (!FuncInfo.UnwindMap.empty())
+ if (!FuncInfo.CxxUnwindMap.empty())
UnwindMapXData = Asm->OutContext.getOrCreateSymbol(
- Twine("$stateUnwindMap$", ParentLinkageName));
+ Twine("$stateUnwindMap$", FuncLinkageName));
if (!FuncInfo.TryBlockMap.empty())
- TryBlockMapXData = Asm->OutContext.getOrCreateSymbol(
- Twine("$tryMap$", ParentLinkageName));
- if (!FuncInfo.IPToStateList.empty())
- IPToStateXData = Asm->OutContext.getOrCreateSymbol(
- Twine("$ip2state$", ParentLinkageName));
+ TryBlockMapXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$tryMap$", FuncLinkageName));
+ if (!IPToStateTable.empty())
+ IPToStateXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$ip2state$", FuncLinkageName));
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
// FuncInfo {
// uint32_t MagicNumber
@@ -363,17 +671,38 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
OS.EmitValueToAlignment(4);
OS.EmitLabel(FuncInfoXData);
- OS.EmitIntValue(0x19930522, 4); // MagicNumber
- OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState
- OS.EmitValue(create32bitRef(UnwindMapXData), 4); // UnwindMap
- OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); // NumTryBlocks
- OS.EmitValue(create32bitRef(TryBlockMapXData), 4); // TryBlockMap
- OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4); // IPMapEntries
- OS.EmitValue(create32bitRef(IPToStateXData), 4); // IPToStateMap
- if (Asm->MAI->usesWindowsCFI())
- OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp
- OS.EmitIntValue(0, 4); // ESTypeList
- OS.EmitIntValue(1, 4); // EHFlags
+
+ AddComment("MagicNumber");
+ OS.EmitIntValue(0x19930522, 4);
+
+ AddComment("MaxState");
+ OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4);
+
+ AddComment("UnwindMap");
+ OS.EmitValue(create32bitRef(UnwindMapXData), 4);
+
+ AddComment("NumTryBlocks");
+ OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);
+
+ AddComment("TryBlockMap");
+ OS.EmitValue(create32bitRef(TryBlockMapXData), 4);
+
+ AddComment("IPMapEntries");
+ OS.EmitIntValue(IPToStateTable.size(), 4);
+
+ AddComment("IPToStateXData");
+ OS.EmitValue(create32bitRef(IPToStateXData), 4);
+
+ if (Asm->MAI->usesWindowsCFI()) {
+ AddComment("UnwindHelp");
+ OS.EmitIntValue(UnwindHelpOffset, 4);
+ }
+
+ AddComment("ESTypeList");
+ OS.EmitIntValue(0, 4);
+
+ AddComment("EHFlags");
+ OS.EmitIntValue(1, 4);
// UnwindMapEntry {
// int32_t ToState;
@@ -381,9 +710,14 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// };
if (UnwindMapXData) {
OS.EmitLabel(UnwindMapXData);
- for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) {
- OS.EmitIntValue(UME.ToState, 4); // ToState
- OS.EmitValue(create32bitRef(UME.Cleanup), 4); // Action
+ for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) {
+ MCSymbol *CleanupSym =
+ getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>());
+ AddComment("ToState");
+ OS.EmitIntValue(UME.ToState, 4);
+
+ AddComment("Action");
+ OS.EmitValue(create32bitRef(CleanupSym), 4);
}
}
@@ -398,33 +732,49 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
OS.EmitLabel(TryBlockMapXData);
SmallVector<MCSymbol *, 1> HandlerMaps;
for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
- WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
- MCSymbol *HandlerMapXData = nullptr;
+ const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+ MCSymbol *HandlerMapXData = nullptr;
if (!TBME.HandlerArray.empty())
HandlerMapXData =
Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$")
.concat(Twine(I))
.concat("$")
- .concat(ParentLinkageName));
-
+ .concat(FuncLinkageName));
HandlerMaps.push_back(HandlerMapXData);
- int CatchHigh = -1;
- for (WinEHHandlerType &HT : TBME.HandlerArray)
- CatchHigh =
- std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]);
-
- assert(TBME.TryLow <= TBME.TryHigh);
- OS.EmitIntValue(TBME.TryLow, 4); // TryLow
- OS.EmitIntValue(TBME.TryHigh, 4); // TryHigh
- OS.EmitIntValue(CatchHigh, 4); // CatchHigh
- OS.EmitIntValue(TBME.HandlerArray.size(), 4); // NumCatches
- OS.EmitValue(create32bitRef(HandlerMapXData), 4); // HandlerArray
+ // TBMEs should form intervals.
+ assert(0 <= TBME.TryLow && "bad trymap interval");
+ assert(TBME.TryLow <= TBME.TryHigh && "bad trymap interval");
+ assert(TBME.TryHigh < TBME.CatchHigh && "bad trymap interval");
+ assert(TBME.CatchHigh < int(FuncInfo.CxxUnwindMap.size()) &&
+ "bad trymap interval");
+
+ AddComment("TryLow");
+ OS.EmitIntValue(TBME.TryLow, 4);
+
+ AddComment("TryHigh");
+ OS.EmitIntValue(TBME.TryHigh, 4);
+
+ AddComment("CatchHigh");
+ OS.EmitIntValue(TBME.CatchHigh, 4);
+
+ AddComment("NumCatches");
+ OS.EmitIntValue(TBME.HandlerArray.size(), 4);
+
+ AddComment("HandlerArray");
+ OS.EmitValue(create32bitRef(HandlerMapXData), 4);
+ }
+
+ // All funclets use the same parent frame offset currently.
+ unsigned ParentFrameOffset = 0;
+ if (shouldEmitPersonality) {
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ ParentFrameOffset = TFI->getWinEHParentFrameOffset(*MF);
}
for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
- WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+ const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
MCSymbol *HandlerMapXData = HandlerMaps[I];
if (!HandlerMapXData)
continue;
@@ -438,32 +788,34 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
OS.EmitLabel(HandlerMapXData);
for (const WinEHHandlerType &HT : TBME.HandlerArray) {
// Get the frame escape label with the offset of the catch object. If
- // the index is -1, then there is no catch object, and we should emit an
- // offset of zero, indicating that no copy will occur.
+ // the index is INT_MAX, then there is no catch object, and we should
+ // emit an offset of zero, indicating that no copy will occur.
const MCExpr *FrameAllocOffsetRef = nullptr;
- if (HT.CatchObjRecoverIdx >= 0) {
- MCSymbol *FrameAllocOffset =
- Asm->OutContext.getOrCreateFrameAllocSymbol(
- GlobalValue::getRealLinkageName(ParentF->getName()),
- HT.CatchObjRecoverIdx);
- FrameAllocOffsetRef = MCSymbolRefExpr::create(
- FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+ if (HT.CatchObj.FrameIndex != INT_MAX) {
+ int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo);
+ FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext);
} else {
FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);
}
- OS.EmitIntValue(HT.Adjectives, 4); // Adjectives
- OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); // Type
- OS.EmitValue(FrameAllocOffsetRef, 4); // CatchObjOffset
- OS.EmitValue(create32bitRef(HT.Handler), 4); // Handler
+ MCSymbol *HandlerSym =
+ getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>());
+
+ AddComment("Adjectives");
+ OS.EmitIntValue(HT.Adjectives, 4);
+
+ AddComment("Type");
+ OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4);
+
+ AddComment("CatchObjOffset");
+ OS.EmitValue(FrameAllocOffsetRef, 4);
+
+ AddComment("Handler");
+ OS.EmitValue(create32bitRef(HandlerSym), 4);
if (shouldEmitPersonality) {
- MCSymbol *ParentFrameOffset =
- Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
- GlobalValue::getRealLinkageName(HT.Handler->getName()));
- const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::create(
- ParentFrameOffset, Asm->OutContext);
- OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset
+ AddComment("ParentFrameOffset");
+ OS.EmitIntValue(ParentFrameOffset, 4);
}
}
}
@@ -475,87 +827,65 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// };
if (IPToStateXData) {
OS.EmitLabel(IPToStateXData);
- for (auto &IPStatePair : FuncInfo.IPToStateList) {
- OS.EmitValue(create32bitRef(IPStatePair.first), 4); // IP
- OS.EmitIntValue(IPStatePair.second, 4); // State
+ for (auto &IPStatePair : IPToStateTable) {
+ AddComment("IP");
+ OS.EmitValue(IPStatePair.first, 4);
+ AddComment("ToState");
+ OS.EmitIntValue(IPStatePair.second, 4);
}
}
}
-void WinException::extendIP2StateTable(const MachineFunction *MF,
- const Function *ParentF,
- WinEHFuncInfo &FuncInfo) {
- const Function *F = MF->getFunction();
-
- // The Itanium LSDA table sorts similar landing pads together to simplify the
- // actions table, but we don't need that.
- SmallVector<const LandingPadInfo *, 64> LandingPads;
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
- LandingPads.reserve(PadInfos.size());
- for (const auto &LP : PadInfos)
- LandingPads.push_back(&LP);
-
- RangeMapType PadMap;
- computePadMap(LandingPads, PadMap);
-
- // The end label of the previous invoke or nounwind try-range.
- MCSymbol *LastLabel = Asm->getFunctionBegin();
-
- // Whether there is a potentially throwing instruction (currently this means
- // an ordinary call) between the end of the previous try-range and now.
- bool SawPotentiallyThrowing = false;
-
- int LastEHState = -2;
-
- // The parent function and the catch handlers contribute to the 'ip2state'
- // table.
-
- // Include ip2state entries for the beginning of the main function and
- // for catch handler functions.
- if (F == ParentF) {
- FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
- LastEHState = -1;
- } else if (FuncInfo.HandlerBaseState.count(F)) {
- FuncInfo.IPToStateList.push_back(
- std::make_pair(LastLabel, FuncInfo.HandlerBaseState[F]));
- LastEHState = FuncInfo.HandlerBaseState[F];
- }
- for (const auto &MBB : *MF) {
- for (const auto &MI : MBB) {
- if (!MI.isEHLabel()) {
- if (MI.isCall())
- SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
- continue;
+void WinException::computeIP2StateTable(
+ const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+ SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable) {
+
+ for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+ FuncletEnd = MF->begin(),
+ End = MF->end();
+ FuncletStart != End; FuncletStart = FuncletEnd) {
+ // Find the end of the funclet
+ while (++FuncletEnd != End) {
+ if (FuncletEnd->isEHFuncletEntry()) {
+ break;
}
+ }
- // End of the previous try-range?
- MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
- if (BeginLabel == LastLabel)
- SawPotentiallyThrowing = false;
-
- // Beginning of a new try-range?
- RangeMapType::const_iterator L = PadMap.find(BeginLabel);
- if (L == PadMap.end())
- // Nope, it was just some random label.
- continue;
-
- const PadRange &P = L->second;
- const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
- assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
- "Inconsistent landing pad map!");
-
- // FIXME: Should this be using FuncInfo.HandlerBaseState?
- if (SawPotentiallyThrowing && LastEHState != -1) {
- FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
- SawPotentiallyThrowing = false;
- LastEHState = -1;
- }
+ // Don't emit ip2state entries for cleanup funclets. Any interesting
+ // exceptional actions in cleanups must be handled in a separate IR
+ // function.
+ if (FuncletStart->isCleanupFuncletEntry())
+ continue;
- if (LandingPad->WinEHState != LastEHState)
- FuncInfo.IPToStateList.push_back(
- std::make_pair(BeginLabel, LandingPad->WinEHState));
- LastEHState = LandingPad->WinEHState;
- LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ MCSymbol *StartLabel;
+ int BaseState;
+ if (FuncletStart == MF->begin()) {
+ BaseState = NullState;
+ StartLabel = Asm->getFunctionBegin();
+ } else {
+ auto *FuncletPad =
+ cast<FuncletPadInst>(FuncletStart->getBasicBlock()->getFirstNonPHI());
+ assert(FuncInfo.FuncletBaseStateMap.count(FuncletPad) != 0);
+ BaseState = FuncInfo.FuncletBaseStateMap.find(FuncletPad)->second;
+ StartLabel = getMCSymbolForMBB(Asm, &*FuncletStart);
+ }
+ assert(StartLabel && "need local function start label");
+ IPToStateTable.push_back(
+ std::make_pair(create32bitRef(StartLabel), BaseState));
+
+ for (const auto &StateChange : InvokeStateChangeIterator::range(
+ FuncInfo, FuncletStart, FuncletEnd, BaseState)) {
+ // Compute the label to report as the start of this entry; use the EH
+ // start label for the invoke if we have one, otherwise (this is a call
+ // which may unwind to our caller and does not have an EH start label, so)
+ // use the previous end label.
+ const MCSymbol *ChangeLabel = StateChange.NewStartLabel;
+ if (!ChangeLabel)
+ ChangeLabel = StateChange.PreviousEndLabel;
+ // Emit an entry indicating that PCs after 'Label' have this EH state.
+ IPToStateTable.push_back(
+ std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState));
+ // FIXME: assert that NewState is between CatchLow and CatchHigh.
}
}
}
@@ -566,15 +896,15 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
// registration in order to recover the parent frame pointer. Now that we know
// we've code generated the parent, we can emit the label assignment that
// those helpers use to get the offset of the registration node.
- assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX &&
- "no EH reg node localescape index");
+ MCContext &Ctx = Asm->OutContext;
MCSymbol *ParentFrameOffset =
- Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName);
- MCSymbol *RegistrationOffsetSym = Asm->OutContext.getOrCreateFrameAllocSymbol(
- FLinkageName, FuncInfo.EHRegNodeEscapeIndex);
- const MCExpr *RegistrationOffsetSymRef =
- MCSymbolRefExpr::create(RegistrationOffsetSym, Asm->OutContext);
- Asm->OutStreamer->EmitAssignment(ParentFrameOffset, RegistrationOffsetSymRef);
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ unsigned UnusedReg;
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+ int64_t Offset = TFI->getFrameIndexReference(
+ *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg);
+ const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
}
/// Emit the language-specific data that _except_handler3 and 4 expect. This is
@@ -585,7 +915,13 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
const Function *F = MF->getFunction();
StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName());
- WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F);
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
// Emit the __ehtable label that we use for llvm.x86.seh.lsda.
@@ -611,58 +947,290 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
//
// Only the EHCookieOffset field appears to vary, and it appears to be the
// offset from the final saved SP value to the retaddr.
+ AddComment("GSCookieOffset");
OS.EmitIntValue(-2, 4);
+ AddComment("GSCookieXOROffset");
OS.EmitIntValue(0, 4);
// FIXME: Calculate.
+ AddComment("EHCookieOffset");
OS.EmitIntValue(9999, 4);
+ AddComment("EHCookieXOROffset");
OS.EmitIntValue(0, 4);
BaseState = -2;
}
- // Build a list of pointers to LandingPadInfos and then sort by WinEHState.
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
- SmallVector<const LandingPadInfo *, 4> LPads;
- LPads.reserve((PadInfos.size()));
- for (const LandingPadInfo &LPInfo : PadInfos)
- LPads.push_back(&LPInfo);
- std::sort(LPads.begin(), LPads.end(),
- [](const LandingPadInfo *L, const LandingPadInfo *R) {
- return L->WinEHState < R->WinEHState;
- });
-
- // For each action in each lpad, emit one of these:
- // struct ScopeTableEntry {
- // int32_t EnclosingLevel;
- // int32_t (__cdecl *Filter)();
- // void *HandlerOrFinally;
- // };
- //
- // The "outermost" action will use BaseState as its enclosing level. Each
- // other action will refer to the previous state as its enclosing level.
- int CurState = 0;
- for (const LandingPadInfo *LPInfo : LPads) {
- int EnclosingLevel = BaseState;
- assert(CurState + int(LPInfo->SEHHandlers.size()) - 1 ==
- LPInfo->WinEHState &&
- "gaps in the SEH scope table");
- for (auto I = LPInfo->SEHHandlers.rbegin(), E = LPInfo->SEHHandlers.rend();
- I != E; ++I) {
- const SEHHandler &Handler = *I;
- const BlockAddress *BA = Handler.RecoverBA;
- const Function *F = Handler.FilterOrFinally;
- assert(F && "cannot catch all in 32-bit SEH without filter function");
- const MCExpr *FilterOrNull =
- create32bitRef(BA ? Asm->getSymbol(F) : nullptr);
- const MCExpr *ExceptOrFinally = create32bitRef(
- BA ? Asm->GetBlockAddressSymbol(BA) : Asm->getSymbol(F));
-
- OS.EmitIntValue(EnclosingLevel, 4);
- OS.EmitValue(FilterOrNull, 4);
- OS.EmitValue(ExceptOrFinally, 4);
-
- // The next state unwinds to this state.
- EnclosingLevel = CurState;
- CurState++;
+ assert(!FuncInfo.SEHUnwindMap.empty());
+ for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) {
+ auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ const MCSymbol *ExceptOrFinally =
+ UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol();
+ // -1 is usually the base state for "unwind to caller", but for
+ // _except_handler4 it's -2. Do that replacement here if necessary.
+ int ToState = UME.ToState == -1 ? BaseState : UME.ToState;
+ AddComment("ToState");
+ OS.EmitIntValue(ToState, 4);
+ AddComment(UME.IsFinally ? "Null" : "FilterFunction");
+ OS.EmitValue(create32bitRef(UME.Filter), 4);
+ AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler");
+ OS.EmitValue(create32bitRef(ExceptOrFinally), 4);
+ }
+}
+
+static int getRank(const WinEHFuncInfo &FuncInfo, int State) {
+ int Rank = 0;
+ while (State != -1) {
+ ++Rank;
+ State = FuncInfo.ClrEHUnwindMap[State].Parent;
+ }
+ return Rank;
+}
+
+static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
+ int LeftRank = getRank(FuncInfo, Left);
+ int RightRank = getRank(FuncInfo, Right);
+
+ while (LeftRank < RightRank) {
+ Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+ --RightRank;
+ }
+
+ while (RightRank < LeftRank) {
+ Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
+ --LeftRank;
+ }
+
+ while (Left != Right) {
+ Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
+ Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+ }
+
+ return Left;
+}
+
+void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
+ // CLR EH "states" are really just IDs that identify handlers/funclets;
+ // states, handlers, and funclets all have 1:1 mappings between them, and a
+ // handler/funclet's "state" is its index in the ClrEHUnwindMap.
+ MCStreamer &OS = *Asm->OutStreamer;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+ MCSymbol *FuncBeginSym = Asm->getFunctionBegin();
+ MCSymbol *FuncEndSym = Asm->getFunctionEnd();
+
+ // A ClrClause describes a protected region.
+ struct ClrClause {
+ const MCSymbol *StartLabel; // Start of protected region
+ const MCSymbol *EndLabel; // End of protected region
+ int State; // Index of handler protecting the protected region
+ int EnclosingState; // Index of funclet enclosing the protected region
+ };
+ SmallVector<ClrClause, 8> Clauses;
+
+ // Build a map from handler MBBs to their corresponding states (i.e. their
+ // indices in the ClrEHUnwindMap).
+ int NumStates = FuncInfo.ClrEHUnwindMap.size();
+ assert(NumStates > 0 && "Don't need exception table!");
+ DenseMap<const MachineBasicBlock *, int> HandlerStates;
+ for (int State = 0; State < NumStates; ++State) {
+ MachineBasicBlock *HandlerBlock =
+ FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
+ HandlerStates[HandlerBlock] = State;
+ // Use this loop through all handlers to verify our assumption (used in
+ // the MinEnclosingState computation) that ancestors have lower state
+ // numbers than their descendants.
+ assert(FuncInfo.ClrEHUnwindMap[State].Parent < State &&
+ "ill-formed state numbering");
+ }
+ // Map the main function to the NullState.
+ HandlerStates[&MF->front()] = NullState;
+
+ // Write out a sentinel indicating the end of the standard (Windows) xdata
+ // and the start of the additional (CLR) info.
+ OS.EmitIntValue(0xffffffff, 4);
+ // Write out the number of funclets
+ OS.EmitIntValue(NumStates, 4);
+
+ // Walk the machine blocks/instrs, computing and emitting a few things:
+ // 1. Emit a list of the offsets to each handler entry, in lexical order.
+ // 2. Compute a map (EndSymbolMap) from each funclet to the symbol at its end.
+ // 3. Compute the list of ClrClauses, in the required order (inner before
+ // outer, earlier before later; the order by which a forward scan with
+ // early termination will find the innermost enclosing clause covering
+ // a given address).
+ // 4. A map (MinClauseMap) from each handler index to the index of the
+ // outermost funclet/function which contains a try clause targeting the
+ // key handler. This will be used to determine IsDuplicate-ness when
+ // emitting ClrClauses. The NullState value is used to indicate that the
+ // top-level function contains a try clause targeting the key handler.
+ // HandlerStack is a stack of (PendingStartLabel, PendingState) pairs for
+ // try regions we entered before entering the PendingState try but which
+ // we haven't yet exited.
+ SmallVector<std::pair<const MCSymbol *, int>, 4> HandlerStack;
+ // EndSymbolMap and MinClauseMap are maps described above.
+ std::unique_ptr<MCSymbol *[]> EndSymbolMap(new MCSymbol *[NumStates]);
+ SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates);
+
+ // Visit the root function and each funclet.
+
+ for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+ FuncletEnd = MF->begin(),
+ End = MF->end();
+ FuncletStart != End; FuncletStart = FuncletEnd) {
+ int FuncletState = HandlerStates[&*FuncletStart];
+ // Find the end of the funclet
+ MCSymbol *EndSymbol = FuncEndSym;
+ while (++FuncletEnd != End) {
+ if (FuncletEnd->isEHFuncletEntry()) {
+ EndSymbol = getMCSymbolForMBB(Asm, &*FuncletEnd);
+ break;
+ }
}
+ // Emit the function/funclet end and, if this is a funclet (and not the
+ // root function), record it in the EndSymbolMap.
+ OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4);
+ if (FuncletState != NullState) {
+ // Record the end of the handler.
+ EndSymbolMap[FuncletState] = EndSymbol;
+ }
+
+ // Walk the state changes in this function/funclet and compute its clauses.
+ // Funclets always start in the null state.
+ const MCSymbol *CurrentStartLabel = nullptr;
+ int CurrentState = NullState;
+ assert(HandlerStack.empty());
+ for (const auto &StateChange :
+ InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) {
+ // Close any try regions we're not still under
+ int AncestorState =
+ getAncestor(FuncInfo, CurrentState, StateChange.NewState);
+ while (CurrentState != AncestorState) {
+ assert(CurrentState != NullState && "Failed to find ancestor!");
+ // Close the pending clause
+ Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel,
+ CurrentState, FuncletState});
+ // Now the parent handler is current
+ CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent;
+ // Pop the new start label from the handler stack if we've exited all
+ // descendants of the corresponding handler.
+ if (HandlerStack.back().second == CurrentState)
+ CurrentStartLabel = HandlerStack.pop_back_val().first;
+ }
+
+ if (StateChange.NewState != CurrentState) {
+ // For each clause we're starting, update the MinClauseMap so we can
+ // know which is the topmost funclet containing a clause targeting
+ // it.
+ for (int EnteredState = StateChange.NewState;
+ EnteredState != CurrentState;
+ EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) {
+ int &MinEnclosingState = MinClauseMap[EnteredState];
+ if (FuncletState < MinEnclosingState)
+ MinEnclosingState = FuncletState;
+ }
+ // Save the previous current start/label on the stack and update to
+ // the newly-current start/state.
+ HandlerStack.emplace_back(CurrentStartLabel, CurrentState);
+ CurrentStartLabel = StateChange.NewStartLabel;
+ CurrentState = StateChange.NewState;
+ }
+ }
+ assert(HandlerStack.empty());
+ }
+
+ // Now emit the clause info, starting with the number of clauses.
+ OS.EmitIntValue(Clauses.size(), 4);
+ for (ClrClause &Clause : Clauses) {
+ // Emit a CORINFO_EH_CLAUSE :
+ /*
+ struct CORINFO_EH_CLAUSE
+ {
+ CORINFO_EH_CLAUSE_FLAGS Flags; // actually a CorExceptionFlag
+ DWORD TryOffset;
+ DWORD TryLength; // actually TryEndOffset
+ DWORD HandlerOffset;
+ DWORD HandlerLength; // actually HandlerEndOffset
+ union
+ {
+ DWORD ClassToken; // use for catch clauses
+ DWORD FilterOffset; // use for filter clauses
+ };
+ };
+
+ enum CORINFO_EH_CLAUSE_FLAGS
+ {
+ CORINFO_EH_CLAUSE_NONE = 0,
+ CORINFO_EH_CLAUSE_FILTER = 0x0001, // This clause is for a filter
+ CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause
+ CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause
+ };
+ typedef enum CorExceptionFlag
+ {
+ COR_ILEXCEPTION_CLAUSE_NONE,
+ COR_ILEXCEPTION_CLAUSE_FILTER = 0x0001, // This is a filter clause
+ COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This is a finally clause
+ COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004, // This is a fault clause
+ COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This
+ // clause was duplicated
+ // to a funclet which was
+ // pulled out of line
+ } CorExceptionFlag;
+ */
+ // Add 1 to the start/end of the EH clause; the IP associated with a
+ // call when the runtime does its scan is the IP of the next instruction
+ // (the one to which control will return after the call), so we need
+ // to add 1 to the end of the clause to cover that offset. We also add
+ // 1 to the start of the clause to make sure that the ranges reported
+ // for all clauses are disjoint. Note that we'll need some additional
+ // logic when machine traps are supported, since in that case the IP
+ // that the runtime uses is the offset of the faulting instruction
+ // itself; if such an instruction immediately follows a call but the
+ // two belong to different clauses, we'll need to insert a nop between
+ // them so the runtime can distinguish the point to which the call will
+ // return from the point at which the fault occurs.
+
+ const MCExpr *ClauseBegin =
+ getOffsetPlusOne(Clause.StartLabel, FuncBeginSym);
+ const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym);
+
+ const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State];
+ MachineBasicBlock *HandlerBlock = Entry.Handler.get<MachineBasicBlock *>();
+ MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock);
+ const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym);
+ MCSymbol *EndSym = EndSymbolMap[Clause.State];
+ const MCExpr *HandlerEnd = getOffset(EndSym, FuncBeginSym);
+
+ uint32_t Flags = 0;
+ switch (Entry.HandlerType) {
+ case ClrHandlerType::Catch:
+ // Leaving bits 0-2 clear indicates catch.
+ break;
+ case ClrHandlerType::Filter:
+ Flags |= 1;
+ break;
+ case ClrHandlerType::Finally:
+ Flags |= 2;
+ break;
+ case ClrHandlerType::Fault:
+ Flags |= 4;
+ break;
+ }
+ if (Clause.EnclosingState != MinClauseMap[Clause.State]) {
+ // This is a "duplicate" clause; the handler needs to be entered from a
+ // frame above the one holding the invoke.
+ assert(Clause.EnclosingState > MinClauseMap[Clause.State]);
+ Flags |= 8;
+ }
+ OS.EmitIntValue(Flags, 4);
+
+ // Write the clause start/end
+ OS.EmitValue(ClauseBegin, 4);
+ OS.EmitValue(ClauseEnd, 4);
+
+ // Write out the handler start/end
+ OS.EmitValue(HandlerBegin, 4);
+ OS.EmitValue(HandlerEnd, 4);
+
+ // Write out the type token or filter offset
+ assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters");
+ OS.EmitIntValue(Entry.TypeToken, 4);
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
index 669c9cc..acb3010 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -21,6 +21,7 @@ class Function;
class GlobalValue;
class MachineFunction;
class MCExpr;
+class Value;
struct WinEHFuncInfo;
class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
@@ -36,7 +37,14 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// True if this is a 64-bit target and we should use image relative offsets.
bool useImageRel32 = false;
- void emitCSpecificHandlerTable();
+ /// Pointer to the current funclet entry BB.
+ const MachineBasicBlock *CurrentFuncletEntry = nullptr;
+
+ void emitCSpecificHandlerTable(const MachineFunction *MF);
+
+ void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+ const MCSymbol *BeginLabel,
+ const MCSymbol *EndLabel, int State);
/// Emit the EH table data for 32-bit and 64-bit functions using
/// the __CxxFrameHandler3 personality.
@@ -47,8 +55,11 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// tables.
void emitExceptHandlerTable(const MachineFunction *MF);
- void extendIP2StateTable(const MachineFunction *MF, const Function *ParentF,
- WinEHFuncInfo &FuncInfo);
+ void emitCLRExceptionTable(const MachineFunction *MF);
+
+ void computeIP2StateTable(
+ const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+ SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable);
/// Emits the label used with llvm.x86.seh.recoverfp, which is used by
/// outlined funclets.
@@ -57,6 +68,16 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
const MCExpr *create32bitRef(const MCSymbol *Value);
const MCExpr *create32bitRef(const GlobalValue *GV);
+ const MCExpr *getLabelPlusOne(const MCSymbol *Label);
+ const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
+ const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom);
+
+ /// Gets the offset that we should use in a table for a stack object with the
+ /// given index. For targets using CFI (Win64, etc), this is relative to the
+ /// established SP at the end of the prologue. For targets without CFI (Win32
+ /// only), it is relative to the frame pointer.
+ int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo);
public:
//===--------------------------------------------------------------------===//
@@ -74,6 +95,10 @@ public:
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
+
+ /// \brief Emit target-specific EH funclet machinery.
+ void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override;
+ void endFunclet() override;
};
}
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 530ab46..d12fdb2 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -8,10 +8,14 @@
//===----------------------------------------------------------------------===//
//
// This file contains a pass (at IR level) to replace atomic instructions with
-// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
+// target specific instruction which implement the same semantics in a way
+// which better fits the target backend. This can include the use of either
+// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or
+// type coercions.
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +24,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -44,13 +49,17 @@ namespace {
private:
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
bool IsStore, bool IsLoad);
- bool expandAtomicLoad(LoadInst *LI);
+ IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
+ LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
+ bool tryExpandAtomicLoad(LoadInst *LI);
bool expandAtomicLoadToLL(LoadInst *LI);
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+ StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
bool expandAtomicStore(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
- bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
- bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
+ bool expandAtomicOpToLLSC(
+ Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
+ std::function<Value *(IRBuilder<> &, Value *)> PerformOp);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool isIdempotentRMW(AtomicRMWInst *AI);
bool simplifyIdempotentRMW(AtomicRMWInst *AI);
@@ -108,7 +117,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(Monotonic);
IsStore = IsLoad = true;
- } else if (CASI && !TLI->hasLoadLinkedStoreConditional() &&
+ } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
(isAtLeastRelease(CASI->getSuccessOrdering()) ||
isAtLeastAcquire(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
@@ -126,10 +135,28 @@ bool AtomicExpand::runOnFunction(Function &F) {
}
}
- if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) {
- MadeChange |= expandAtomicLoad(LI);
- } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
- MadeChange |= expandAtomicStore(SI);
+ if (LI) {
+ if (LI->getType()->isFloatingPointTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ LI = convertAtomicLoadToIntegerType(LI);
+ assert(LI->getType()->isIntegerTy() && "invariant broken");
+ MadeChange = true;
+ }
+
+ MadeChange |= tryExpandAtomicLoad(LI);
+ } else if (SI) {
+ if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ SI = convertAtomicStoreToIntegerType(SI);
+ assert(SI->getValueOperand()->getType()->isIntegerTy() &&
+ "invariant broken");
+ MadeChange = true;
+ }
+
+ if (TLI->shouldExpandAtomicStoreInIR(SI))
+ MadeChange |= expandAtomicStore(SI);
} else if (RMWI) {
// There are two different ways of expanding RMW instructions:
// - into a load if it is idempotent
@@ -141,7 +168,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
} else {
MadeChange |= tryExpandAtomicRMW(RMWI);
}
- } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
+ } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) {
MadeChange |= expandAtomicCmpXchg(CASI);
}
}
@@ -169,11 +196,56 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
return (LeadingFence || TrailingFence);
}
-bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
- if (TLI->hasLoadLinkedStoreConditional())
+/// Get the iX type with the same bitwidth as T.
+IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
+ const DataLayout &DL) {
+ EVT VT = TLI->getValueType(DL, T);
+ unsigned BitWidth = VT.getStoreSizeInBits();
+ assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
+ return IntegerType::get(T->getContext(), BitWidth);
+}
+
+/// Convert an atomic load of a non-integral type to an integer load of the
+/// equivelent bitwidth. See the function comment on
+/// convertAtomicStoreToIntegerType for background.
+LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
+ auto *M = LI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(LI->getType(),
+ M->getDataLayout());
+
+ IRBuilder<> Builder(LI);
+
+ Value *Addr = LI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy,
+ Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ auto *NewLI = Builder.CreateLoad(NewAddr);
+ NewLI->setAlignment(LI->getAlignment());
+ NewLI->setVolatile(LI->isVolatile());
+ NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
+ DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
+
+ Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
+ LI->replaceAllUsesWith(NewVal);
+ LI->eraseFromParent();
+ return NewLI;
+}
+
+bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
+ switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC:
+ return expandAtomicOpToLLSC(
+ LI, LI->getPointerOperand(), LI->getOrdering(),
+ [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
+ case TargetLoweringBase::AtomicExpansionKind::LLOnly:
return expandAtomicLoadToLL(LI);
- else
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicLoadToCmpXchg(LI);
+ }
+ llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
@@ -184,6 +256,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
// to be single-copy atomic by ARM is an ldrexd (A3.5.3).
Value *Val =
TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
LI->replaceAllUsesWith(Val);
LI->eraseFromParent();
@@ -209,6 +282,35 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
return true;
}
+/// Convert an atomic store of a non-integral type to an integer store of the
+/// equivelent bitwidth. We used to not support floating point or vector
+/// atomics in the IR at all. The backends learned to deal with the bitcast
+/// idiom because that was the only way of expressing the notion of a atomic
+/// float or vector store. The long term plan is to teach each backend to
+/// instruction select from the original atomic store, but as a migration
+/// mechanism, we convert back to the old format which the backends understand.
+/// Each backend will need individual work to recognize the new format.
+StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
+ IRBuilder<> Builder(SI);
+ auto *M = SI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
+ M->getDataLayout());
+ Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
+
+ Value *Addr = SI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy,
+ Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
+ NewSI->setAlignment(SI->getAlignment());
+ NewSI->setVolatile(SI->isVolatile());
+ NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
+ DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
+ SI->eraseFromParent();
+ return NewSI;
+}
+
bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
@@ -226,23 +328,15 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
return tryExpandAtomicRMW(AI);
}
-bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
- switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
- case TargetLoweringBase::AtomicRMWExpansionKind::None:
- return false;
- case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
- assert(TLI->hasLoadLinkedStoreConditional() &&
- "TargetLowering requested we expand AtomicRMW instruction into "
- "load-linked/store-conditional combos, but such instructions aren't "
- "supported");
-
- return expandAtomicRMWToLLSC(AI);
- }
- case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
- return expandAtomicRMWToCmpXchg(AI);
- }
- }
- llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
+static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
+ Value *Loaded, Value *NewVal,
+ AtomicOrdering MemOpOrder,
+ Value *&Success, Value *&NewLoaded) {
+ Value* Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+ Success = Builder.CreateExtractValue(Pair, 1, "success");
+ NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
@@ -282,10 +376,28 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
}
}
-bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
- AtomicOrdering MemOpOrder = AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+ switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC:
+ return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(),
+ [&](IRBuilder<> &Builder, Value *Loaded) {
+ return performAtomicOp(AI->getOperation(),
+ Builder, Loaded,
+ AI->getValOperand());
+ });
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
+ return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
+ }
+}
+
+bool AtomicExpand::expandAtomicOpToLLSC(
+ Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
+ std::function<Value *(IRBuilder<> &, Value *)> PerformOp) {
+ BasicBlock *BB = I->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
@@ -303,11 +415,11 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
// atomicrmw.end:
// fence?
// [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
+ // This grabs the DebugLoc from I.
+ IRBuilder<> Builder(I);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we might want a fence too. It's easiest to just remove
@@ -320,8 +432,7 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
Builder.SetInsertPoint(LoopBB);
Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
- Value *NewVal =
- performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+ Value *NewVal = PerformOp(Builder, Loaded);
Value *StoreSuccess =
TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
@@ -331,72 +442,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- AI->replaceAllUsesWith(Loaded);
- AI->eraseFromParent();
-
- return true;
-}
-
-bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
- AtomicOrdering MemOpOrder =
- AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: atomicrmw some_op iN* %addr, iN %incr ordering
- //
- // The standard expansion we produce is:
- // [...]
- // %init_loaded = load atomic iN* %addr
- // br label %loop
- // loop:
- // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
- // %new = some_op iN %loaded, %incr
- // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
- // %new_loaded = extractvalue { iN, i1 } %pair, 0
- // %success = extractvalue { iN, i1 } %pair, 1
- // br i1 %success, label %atomicrmw.end, label %loop
- // atomicrmw.end:
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we want a load. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- LoadInst *InitLoaded = Builder.CreateLoad(Addr);
- // Atomics require at least natural alignment.
- InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
- Loaded->addIncoming(InitLoaded, BB);
-
- Value *NewVal =
- performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
-
- Value *Pair = Builder.CreateAtomicCmpXchg(
- Addr, Loaded, NewVal, MemOpOrder,
- AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
- Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
- Loaded->addIncoming(NewLoaded, LoopBB);
-
- Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
- Builder.CreateCondBr(Success, ExitBB, LoopBB);
-
- Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-
- AI->replaceAllUsesWith(NewLoaded);
- AI->eraseFromParent();
+ I->replaceAllUsesWith(Loaded);
+ I->eraseFromParent();
return true;
}
@@ -424,7 +471,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// %loaded = @load.linked(%addr)
// %should_store = icmp eq %loaded, %desired
// br i1 %should_store, label %cmpxchg.trystore,
- // label %cmpxchg.failure
+ // label %cmpxchg.nostore
// cmpxchg.trystore:
// %stored = @store_conditional(%new, %addr)
// %success = icmp eq i32 %stored, 0
@@ -432,6 +479,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// cmpxchg.success:
// fence?
// br label %cmpxchg.end
+ // cmpxchg.nostore:
+ // @load_linked_fail_balance()?
+ // br label %cmpxchg.failure
// cmpxchg.failure:
// fence?
// br label %cmpxchg.end
@@ -440,9 +490,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
// %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
// [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
+ BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
- auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+ auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
@@ -466,7 +517,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
Builder.SetInsertPoint(TryStoreBB);
Value *StoreSuccess = TLI->emitStoreConditional(
@@ -482,6 +533,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
/*IsLoad=*/true);
Builder.CreateBr(ExitBB);
+ Builder.SetInsertPoint(NoStoreBB);
+ // In the failing case, where we don't execute the store-conditional, the
+ // target might want to balance out the load-linked with a dedicated
+ // instruction (e.g., on ARM, clearing the exclusive monitor).
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
+ Builder.CreateBr(FailureBB);
+
Builder.SetInsertPoint(FailureBB);
TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
/*IsLoad=*/true);
@@ -556,9 +614,77 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
- if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
- expandAtomicLoad(ResultingLoad);
+ tryExpandAtomicLoad(ResultingLoad);
return true;
}
return false;
}
+
+bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg) {
+ assert(AI);
+
+ AtomicOrdering MemOpOrder =
+ AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+ // Atomics require at least natural alignment.
+ InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *NewLoaded = nullptr;
+ Value *Success = nullptr;
+
+ CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
+ Success, NewLoaded);
+ assert(Success && NewLoaded);
+
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+ AI->replaceAllUsesWith(NewLoaded);
+ AI->eraseFromParent();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index db00910..a67e194 100644
--- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -33,6 +33,6 @@ cl::opt<unsigned>
cl::desc("Threshold for partial unrolling"),
cl::Hidden);
-BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F)
+BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 6182667..604feed 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -12,7 +12,8 @@
// it then removes.
//
// Note that this pass must be run after register allocation, it cannot handle
-// SSA form.
+// SSA form. It also must handle virtual registers for targets that emit virtual
+// ISA (e.g. NVPTX).
//
//===----------------------------------------------------------------------===//
@@ -20,6 +21,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -95,7 +97,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
// TailMerge can create jump into if branches that make CFG irreducible for
// HW that requires structurized CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
- PassConfig->getEnableTailMerge();
+ PassConfig->getEnableTailMerge();
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true,
getAnalysis<MachineBlockFrequencyInfo>(),
getAnalysis<MachineBranchProbabilityInfo>());
@@ -132,6 +134,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
// Remove the block.
MF->erase(MBB);
+ FuncletMembership.erase(MBB);
}
/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
@@ -150,9 +153,13 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
if (!I->isImplicitDef())
break;
unsigned Reg = I->getOperand(0).getReg();
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- ImpDefRegs.insert(*SubRegs);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ ImpDefRegs.insert(*SubRegs);
+ } else {
+ ImpDefRegs.insert(Reg);
+ }
++I;
}
if (ImpDefRegs.empty())
@@ -163,8 +170,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
if (!TII->isUnpredicatedTerminator(I))
return false;
// See if it uses any of the implicitly defined registers.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = I->getOperand(i);
+ for (const MachineOperand &MO : I->operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
@@ -208,14 +214,17 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// Fix CFG. The later algorithms expect it to be right.
bool MadeChange = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
- MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr;
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
- MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
- MadeChange |= OptimizeImpDefsBlock(MBB);
+ if (!TII->AnalyzeBranch(MBB, TBB, FBB, Cond, true))
+ MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ MadeChange |= OptimizeImpDefsBlock(&MBB);
}
+ // Recalculate funclet membership.
+ FuncletMembership = getFuncletMembership(MF);
+
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
MadeChangeThisIteration = TailMergeBlocks(MF);
@@ -235,12 +244,9 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// Walk the function to find jump tables that are live.
BitVector JTIsLive(JTI->getJumpTables().size());
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
- BB != E; ++BB) {
- for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
- I != E; ++I)
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- MachineOperand &Op = I->getOperand(op);
+ for (const MachineBasicBlock &BB : MF) {
+ for (const MachineInstr &I : BB)
+ for (const MachineOperand &Op : I.operands()) {
if (!Op.isJTI()) continue;
// Remember that this JT is live.
@@ -365,7 +371,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
}
// Back past possible debugging pseudos at beginning of block. This matters
// when one block differs from the other only by whether debugging pseudos
- // are present at the beginning. (This way, the various checks later for
+ // are present at the beginning. (This way, the various checks later for
// I1==MBB1->begin() work as expected.)
if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
--I2;
@@ -426,7 +432,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MachineFunction &MF = *CurMBB.getParent();
// Create the fall-through block.
- MachineFunction::iterator MBBI = &CurMBB;
+ MachineFunction::iterator MBBI = CurMBB.getIterator();
MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB);
CurMBB.getParent()->insert(++MBBI, NewMBB);
@@ -445,6 +451,11 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// For targets that use the register scavenger, we must maintain LiveIns.
MaintainLiveIns(&CurMBB, NewMBB);
+ // Add the new block to the funclet.
+ const auto &FuncletI = FuncletMembership.find(&CurMBB);
+ if (FuncletI != FuncletMembership.end())
+ FuncletMembership[NewMBB] = FuncletI->second;
+
return NewMBB;
}
@@ -479,7 +490,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
DebugLoc dl; // FIXME: this is nowhere
if (I != MF->end() &&
!TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
- MachineBasicBlock *NextBB = I;
+ MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->ReverseBranchCondition(Cond)) {
TII->RemoveBranch(*CurMBB);
@@ -549,14 +560,23 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
/// and decide if it would be profitable to merge those tails. Return the
/// length of the common tail and iterators to the first common instruction
/// in each block.
-static bool ProfitableToMerge(MachineBasicBlock *MBB1,
- MachineBasicBlock *MBB2,
- unsigned minCommonTailLength,
- unsigned &CommonTailLen,
- MachineBasicBlock::iterator &I1,
- MachineBasicBlock::iterator &I2,
- MachineBasicBlock *SuccBB,
- MachineBasicBlock *PredBB) {
+static bool
+ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
+ unsigned minCommonTailLength, unsigned &CommonTailLen,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<const MachineBasicBlock *, int> &FuncletMembership) {
+ // It is never profitable to tail-merge blocks from two different funclets.
+ if (!FuncletMembership.empty()) {
+ auto Funclet1 = FuncletMembership.find(MBB1);
+ assert(Funclet1 != FuncletMembership.end());
+ auto Funclet2 = FuncletMembership.find(MBB2);
+ assert(Funclet2 != FuncletMembership.end());
+ if (Funclet1->second != Funclet2->second)
+ return false;
+ }
+
CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
if (CommonTailLen == 0)
return false;
@@ -600,12 +620,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
// branch instruction, which is likely to be smaller than the 2
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
- if (EffectiveTailLen >= 2 &&
- MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
- (I1 == MBB1->begin() || I2 == MBB2->begin()))
- return true;
-
- return false;
+ return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() &&
+ (I1 == MBB1->begin() || I2 == MBB2->begin());
}
/// ComputeSameTails - Look through all the blocks in MergePotentials that have
@@ -634,7 +650,8 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
minCommonTailLength,
CommonTailLen, TrialBBI1, TrialBBI2,
- SuccBB, PredBB)) {
+ SuccBB, PredBB,
+ FuncletMembership)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
@@ -776,7 +793,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
if (MBBICommon->mayLoad() || MBBICommon->mayStore())
if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
- MBBICommon->clearMemRefs();
+ MBBICommon->dropMemRefs();
++MBBI;
++MBBICommon;
@@ -840,8 +857,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// block, which we can't jump to), we can treat all blocks with this same
// tail at once. Use PredBB if that is one of the possibilities, as that
// will not introduce any extra branches.
- MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
- getParent()->begin();
+ MachineBasicBlock *EntryBB =
+ &MergePotentials.front().getBlock()->getParent()->front();
unsigned commonTailIndex = SameTails.size();
// If there are two blocks, check to see if one can be made to fall through
// into the other.
@@ -917,12 +934,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// First find blocks with no successors.
MergePotentials.clear();
- for (MachineFunction::iterator I = MF.begin(), E = MF.end();
- I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
- if (TriedMerging.count(I))
- continue;
- if (I->succ_empty())
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
+ for (MachineBasicBlock &MBB : MF) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+ if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(&MBB), &MBB));
}
// If this is a large problem, avoid visiting the same basic blocks
@@ -958,13 +974,13 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
I != E; ++I) {
if (I->pred_size() < 2) continue;
SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
- MachineBasicBlock *IBB = I;
- MachineBasicBlock *PredBB = std::prev(I);
+ MachineBasicBlock *IBB = &*I;
+ MachineBasicBlock *PredBB = &*std::prev(I);
MergePotentials.clear();
- for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
- E2 = I->pred_end();
- P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
- MachineBasicBlock *PBB = *P;
+ for (MachineBasicBlock *PBB : I->predecessors()) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+
if (TriedMerging.count(PBB))
continue;
@@ -977,7 +993,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
continue;
// Skip blocks which may jump to a landing pad. Can't tail merge these.
- if (PBB->getLandingPadSuccessor())
+ if (PBB->hasEHPadSuccessor())
continue;
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -990,18 +1006,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (TII->ReverseBranchCondition(NewCond))
continue;
// This is the QBB case described above
- if (!FBB)
- FBB = std::next(MachineFunction::iterator(PBB));
+ if (!FBB) {
+ auto Next = ++PBB->getIterator();
+ if (Next != MF.end())
+ FBB = &*Next;
+ }
}
// Failing case: the only way IBB can be reached from PBB is via
// exception handling. Happens for landing pads. Would be nice to have
// a bit in the edge so we didn't have to do all this.
- if (IBB->isLandingPad()) {
- MachineFunction::iterator IP = PBB; IP++;
+ if (IBB->isEHPad()) {
+ MachineFunction::iterator IP = ++PBB->getIterator();
MachineBasicBlock *PredNextBB = nullptr;
if (IP != MF.end())
- PredNextBB = IP;
+ PredNextBB = &*IP;
if (!TBB) {
if (IBB != PredNextBB) // fallthrough
continue;
@@ -1027,7 +1046,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
NewCond, dl);
}
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), PBB));
}
}
@@ -1042,7 +1061,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Reinsert an unconditional branch if needed. The 1 below can occur as a
// result of removing blocks in TryTailMergeBlocks.
- PredBB = std::prev(I); // this may have been changed in TryTailMergeBlocks
+ PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks
if (MergePotentials.size() == 1 &&
MergePotentials.begin()->getBlock() != PredBB)
FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -1080,13 +1099,19 @@ void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
if (TailMBB.succ_size() <= 1)
return;
- auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end());
- uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1;
+ auto SumEdgeFreq =
+ std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0))
+ .getFrequency();
auto EdgeFreq = EdgeFreqLs.begin();
- for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
- SuccI != SuccE; ++SuccI, ++EdgeFreq)
- TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale);
+ if (SumEdgeFreq > 0) {
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq) {
+ auto Prob = BranchProbability::getBranchProbability(
+ EdgeFreq->getFrequency(), SumEdgeFreq);
+ TailMBB.setSuccProbability(SuccI, Prob);
+ }
+ }
}
//===----------------------------------------------------------------------===//
@@ -1098,10 +1123,12 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
// Make sure blocks are numbered in order
MF.RenumberBlocks();
+ // Renumbering blocks alters funclet membership, recalculate it.
+ FuncletMembership = getFuncletMembership(MF);
for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
I != E; ) {
- MachineBasicBlock *MBB = I++;
+ MachineBasicBlock *MBB = &*I++;
MadeChange |= OptimizeBlock(MBB);
// If it is dead, remove it.
@@ -1111,6 +1138,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
++NumDeadBlocks;
}
}
+
return MadeChange;
}
@@ -1167,20 +1195,31 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
MachineFunction &MF = *MBB->getParent();
ReoptimizeBlock:
- MachineFunction::iterator FallThrough = MBB;
+ MachineFunction::iterator FallThrough = MBB->getIterator();
++FallThrough;
+ // Make sure MBB and FallThrough belong to the same funclet.
+ bool SameFunclet = true;
+ if (!FuncletMembership.empty() && FallThrough != MF.end()) {
+ auto MBBFunclet = FuncletMembership.find(MBB);
+ assert(MBBFunclet != FuncletMembership.end());
+ auto FallThroughFunclet = FuncletMembership.find(&*FallThrough);
+ assert(FallThroughFunclet != FuncletMembership.end());
+ SameFunclet = MBBFunclet->second == FallThroughFunclet->second;
+ }
+
// If this block is empty, make everyone use its fall-through, not the block
// explicitly. Landing pads should not do this since the landing-pad table
// points to this block. Blocks with their addresses taken shouldn't be
// optimized away.
- if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+ if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() &&
+ SameFunclet) {
// Dead block? Leave for cleanup later.
if (MBB->pred_empty()) return MadeChange;
if (FallThrough == MF.end()) {
// TODO: Simplify preds to not branch here if possible!
- } else if (FallThrough->isLandingPad()) {
+ } else if (FallThrough->isEHPad()) {
// Don't rewrite to a landing pad fallthough. That could lead to the case
// where a BB jumps to more than one landing pad.
// TODO: Is it ever worth rewriting predecessors which don't already
@@ -1190,12 +1229,12 @@ ReoptimizeBlock:
// instead.
while (!MBB->pred_empty()) {
MachineBasicBlock *Pred = *(MBB->pred_end()-1);
- Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+ Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough);
}
// If MBB was the target of a jump table, update jump tables to go to the
// fallthrough instead.
if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
- MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
+ MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough);
MadeChange = true;
}
return MadeChange;
@@ -1237,7 +1276,7 @@ ReoptimizeBlock:
// AnalyzeBranch.
if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
PrevBB.succ_size() == 1 &&
- !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
+ !MBB->hasAddressTaken() && !MBB->isEHPad()) {
DEBUG(dbgs() << "\nMerging into block: " << PrevBB
<< "From MBB: " << *MBB);
// Remove redundant DBG_VALUEs first.
@@ -1333,7 +1372,7 @@ ReoptimizeBlock:
TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
// Move this block to the end of the function.
- MBB->moveAfter(--MF.end());
+ MBB->moveAfter(&MF.back());
MadeChange = true;
++NumBranchOpts;
return MadeChange;
@@ -1371,7 +1410,7 @@ ReoptimizeBlock:
// other blocks across it.
if (CurTBB && CurCond.empty() && !CurFBB &&
IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
- !MBB->hasAddressTaken()) {
+ !MBB->hasAddressTaken() && !MBB->isEHPad()) {
DebugLoc dl = getBranchDebugLoc(*MBB);
// This block may contain just an unconditional branch. Because there can
// be 'non-branch terminators' in the block, try removing the branch and
@@ -1468,14 +1507,11 @@ ReoptimizeBlock:
// see if it has a fall-through into its successor.
bool CurFallsThru = MBB->canFallThrough();
- if (!MBB->isLandingPad()) {
+ if (!MBB->isEHPad()) {
// Check all the predecessors of this block. If one of them has no fall
// throughs, move this block right after it.
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- E = MBB->pred_end(); PI != E; ++PI) {
+ for (MachineBasicBlock *PredBB : MBB->predecessors()) {
// Analyze the branch at the end of the pred.
- MachineBasicBlock *PredBB = *PI;
- MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (PredBB != MBB && !PredBB->canFallThrough() &&
@@ -1493,8 +1529,7 @@ ReoptimizeBlock:
// B elsewhere
// next:
if (CurFallsThru) {
- MachineBasicBlock *NextBB =
- std::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());
CurCond.clear();
TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
}
@@ -1507,11 +1542,9 @@ ReoptimizeBlock:
if (!CurFallsThru) {
// Check all successors to see if we can move this block before it.
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end(); SI != E; ++SI) {
+ for (MachineBasicBlock *SuccBB : MBB->successors()) {
// Analyze the branch at the end of the block before the succ.
- MachineBasicBlock *SuccBB = *SI;
- MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+ MachineFunction::iterator SuccPrev = --SuccBB->getIterator();
// If this block doesn't already fall-through to that successor, and if
// the succ doesn't already have a block that can fall through into it,
@@ -1519,7 +1552,7 @@ ReoptimizeBlock:
// fallthrough to happen.
if (SuccBB != MBB && &*SuccPrev != MBB &&
!SuccPrev->canFallThrough() && !CurUnAnalyzable &&
- !SuccBB->isLandingPad()) {
+ !SuccBB->isEHPad()) {
MBB->moveBefore(SuccBB);
MadeChange = true;
goto ReoptimizeBlock;
@@ -1531,10 +1564,18 @@ ReoptimizeBlock:
// removed, move this block to the end of the function.
MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
SmallVector<MachineOperand, 4> PrevCond;
+ // We're looking for cases where PrevBB could possibly fall through to
+ // FallThrough, but if FallThrough is an EH pad that wouldn't be useful
+ // so here we skip over any EH pads so we might have a chance to find
+ // a branch target from PrevBB.
+ while (FallThrough != MF.end() && FallThrough->isEHPad())
+ ++FallThrough;
+ // Now check to see if the current block is sitting between PrevBB and
+ // a block to which it could fall through.
if (FallThrough != MF.end() &&
!TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
- PrevBB.isSuccessor(FallThrough)) {
- MBB->moveAfter(--MF.end());
+ PrevBB.isSuccessor(&*FallThrough)) {
+ MBB->moveAfter(&MF.back());
MadeChange = true;
return MadeChange;
}
@@ -1553,7 +1594,7 @@ ReoptimizeBlock:
bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
bool MadeChange = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
- MachineBasicBlock *MBB = I++;
+ MachineBasicBlock *MBB = &*I++;
MadeChange |= HoistCommonCodeInSuccs(MBB);
}
@@ -1564,15 +1605,23 @@ bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
/// its 'true' successor.
static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
MachineBasicBlock *TrueBB) {
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- E = BB->succ_end(); SI != E; ++SI) {
- MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock *SuccBB : BB->successors())
if (SuccBB != TrueBB)
return SuccBB;
- }
return nullptr;
}
+template <class Container>
+static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
+ Container &Set) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Set.insert(*AI);
+ } else {
+ Set.insert(Reg);
+ }
+}
+
/// findHoistingInsertPosAndDeps - Find the location to move common instructions
/// in successors to. The location is usually just before the terminator,
/// however if the terminator is a conditional branch and its previous
@@ -1590,16 +1639,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (!TII->isUnpredicatedTerminator(Loc))
return MBB->end();
- for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = Loc->getOperand(i);
+ for (const MachineOperand &MO : Loc->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isUse()) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Uses.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Uses);
} else {
if (!MO.isDead())
// Don't try to hoist code in the rare case the terminator defines a
@@ -1608,8 +1655,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
// If the terminator defines a register, make sure we don't hoist
// the instruction whose def might be clobbered by the terminator.
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Defs.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Defs);
}
}
@@ -1626,8 +1672,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
--PI;
bool IsDef = false;
- for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
- const MachineOperand &MO = PI->getOperand(i);
+ for (const MachineOperand &MO : PI->operands()) {
// If PI has a regmask operand, it is probably a call. Separate away.
if (MO.isRegMask())
return Loc;
@@ -1636,8 +1681,10 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- if (Uses.count(Reg))
+ if (Uses.count(Reg)) {
IsDef = true;
+ break;
+ }
}
if (!IsDef)
// The condition setting instruction is not just before the conditional
@@ -1657,23 +1704,22 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
// Find out what registers are live. Note this routine is ignoring other live
// registers which are only used by instructions in successor blocks.
- for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = PI->getOperand(i);
+ for (const MachineOperand &MO : PI->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isUse()) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Uses.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Uses);
} else {
if (Uses.erase(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ }
}
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Defs.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Defs);
}
}
@@ -1737,8 +1783,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
break;
bool IsSafe = true;
- for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = TIB->getOperand(i);
+ for (MachineOperand &MO : TIB->operands()) {
// Don't attempt to hoist instructions with register masks.
if (MO.isRegMask()) {
IsSafe = false;
@@ -1793,28 +1838,29 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
break;
// Remove kills from LocalDefsSet, these registers had short live ranges.
- for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = TIB->getOperand(i);
+ for (const MachineOperand &MO : TIB->operands()) {
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
continue;
unsigned Reg = MO.getReg();
if (!Reg || !LocalDefsSet.count(Reg))
continue;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- LocalDefsSet.erase(*AI);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.erase(*AI);
+ } else {
+ LocalDefsSet.erase(Reg);
+ }
}
// Track local defs so we can update liveins.
- for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = TIB->getOperand(i);
+ for (const MachineOperand &MO : TIB->operands()) {
if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
LocalDefs.push_back(Reg);
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- LocalDefsSet.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, LocalDefsSet);
}
HasDups = true;
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
index 46c05dc..d759d53 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -54,6 +54,7 @@ namespace llvm {
typedef std::vector<MergePotentialsElt>::iterator MPIterator;
std::vector<MergePotentialsElt> MergePotentials;
SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership;
class SameTailElt {
MPIterator MPIter;
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index d08fae0..abc655a 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -24,6 +25,7 @@ using namespace llvm;
void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
MachineFunction &MF,
+ VirtRegMap *VRM,
const MachineLoopInfo &MLI,
const MachineBlockFrequencyInfo &MBFI,
VirtRegAuxInfo::NormalizingFn norm) {
@@ -31,7 +33,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
<< "********** Function: " << MF.getName() << '\n');
MachineRegisterInfo &MRI = MF.getRegInfo();
- VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm);
+ VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm);
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (MRI.reg_nodbg_empty(Reg))
@@ -74,7 +76,10 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
// Check if all values in LI are rematerializable
static bool isRematerializable(const LiveInterval &LI,
const LiveIntervals &LIS,
+ VirtRegMap *VRM,
const TargetInstrInfo &TII) {
+ unsigned Reg = LI.reg;
+ unsigned Original = VRM ? VRM->getOriginal(Reg) : 0;
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I != E; ++I) {
const VNInfo *VNI = *I;
@@ -86,6 +91,36 @@ static bool isRematerializable(const LiveInterval &LI,
MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
assert(MI && "Dead valno in interval");
+ // Trace copies introduced by live range splitting. The inline
+ // spiller can rematerialize through these copies, so the spill
+ // weight must reflect this.
+ if (VRM) {
+ while (MI->isFullCopy()) {
+ // The copy destination must match the interval register.
+ if (MI->getOperand(0).getReg() != Reg)
+ return false;
+
+ // Get the source register.
+ Reg = MI->getOperand(1).getReg();
+
+ // If the original (pre-splitting) registers match this
+ // copy came from a split.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ VRM->getOriginal(Reg) != Original)
+ return false;
+
+ // Follow the copy live-in value.
+ const LiveInterval &SrcLI = LIS.getInterval(Reg);
+ LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+ VNI = SrcQ.valueIn();
+ assert(VNI && "Copy from non-existing value");
+ if (VNI->isPHIDef())
+ return false;
+ MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+ }
+ }
+
if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis()))
return false;
}
@@ -188,7 +223,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
// it is a preferred candidate for spilling.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
- if (isRematerializable(li, LIS, *MF.getSubtarget().getInstrInfo()))
+ if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
totalWeight *= 0.5F;
li.weight = normalize(totalWeight, li.getSize(), numInstr);
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index fb29b1d..23c0d54 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -32,6 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
CallOrPrologue(Unknown) {
// No stack is used.
StackOffset = 0;
+ MaxStackArgAlign = 1;
clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -192,6 +193,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
MVT VT, CCAssignFn Fn) {
unsigned SavedStackOffset = StackOffset;
+ unsigned SavedMaxStackArgAlign = MaxStackArgAlign;
unsigned NumLocs = Locs.size();
// Set the 'inreg' flag if it is used for this calling convention.
@@ -223,6 +225,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
// as allocated so that future queries don't return the same registers, i.e.
// when i64 and f64 are both passed in GPRs.
StackOffset = SavedStackOffset;
+ MaxStackArgAlign = SavedMaxStackArgAlign;
Locs.resize(NumLocs);
}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 155c5ec..dc13b5b 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -29,6 +29,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeExpandISelPseudosPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
+ initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
initializeIfConverterPass(Registry);
@@ -66,6 +67,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeSlotIndexesPass(Registry);
initializeStackColoringPass(Registry);
initializeStackMapLivenessPass(Registry);
+ initializeLiveDebugValuesPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeTailDuplicatePassPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6ab6acc..5844124 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -63,6 +64,9 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
"computations were sunk");
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumAndsAdded,
+ "Number of and mask instructions added to form ext loads");
+STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
@@ -109,25 +113,18 @@ static cl::opt<bool> StressExtLdPromotion(
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
-struct TypeIsSExt {
- Type *Ty;
- bool IsSExt;
- TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
-};
+typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
const TargetMachine *TM;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
- /// CurInstIterator - As we scan instructions optimizing them, this is the
- /// next instruction to optimize. Xforms that can invalidate this should
- /// update it.
+ /// As we scan instructions optimizing them, this is the next instruction
+ /// to optimize. Transforms that can invalidate this should update it.
BasicBlock::iterator CurInstIterator;
/// Keeps track of non-local addresses that have been sunk into a block.
@@ -141,10 +138,10 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
- /// ModifiedDT - If CFG is modified in anyway.
+ /// True if CFG is modified in any way.
bool ModifiedDT;
- /// OptSize - True if optimizing for size.
+ /// True if optimizing for size.
bool OptSize;
/// DataLayout for the Function being processed.
@@ -167,30 +164,33 @@ class TypePromotionTransaction;
}
private:
- bool EliminateFallThrough(Function &F);
- bool EliminateMostlyEmptyBlocks(Function &F);
- bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
- void EliminateMostlyEmptyBlock(BasicBlock *BB);
- bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
- bool OptimizeInst(Instruction *I, bool& ModifiedDT);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr,
+ bool eliminateFallThrough(Function &F);
+ bool eliminateMostlyEmptyBlocks(Function &F);
+ bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void eliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
+ bool optimizeInst(Instruction *I, bool& ModifiedDT);
+ bool optimizeMemoryInst(Instruction *I, Value *Addr,
Type *AccessTy, unsigned AS);
- bool OptimizeInlineAsmInst(CallInst *CS);
- bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
- bool MoveExtToFormExtLoad(Instruction *&I);
- bool OptimizeExtUses(Instruction *I);
- bool OptimizeSelectInst(SelectInst *SI);
- bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
- bool OptimizeExtractElementInst(Instruction *Inst);
- bool DupRetToEnableTailCallOpts(BasicBlock *BB);
- bool PlaceDbgValues(Function &F);
+ bool optimizeInlineAsmInst(CallInst *CS);
+ bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
+ bool moveExtToFormExtLoad(Instruction *&I);
+ bool optimizeExtUses(Instruction *I);
+ bool optimizeLoadExt(LoadInst *I);
+ bool optimizeSelectInst(SelectInst *SI);
+ bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
+ bool optimizeSwitchInst(SwitchInst *CI);
+ bool optimizeExtractElementInst(Instruction *Inst);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB);
+ bool placeDbgValues(Function &F);
bool sinkAndCmp(Function &F);
- bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
+ bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
unsigned CreatedInstCost);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
+ void stripInvariantGroupMetadata(Instruction &I);
};
}
@@ -218,7 +218,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
+ OptSize = F.optForSize();
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
@@ -231,12 +231,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Eliminate blocks that contain only PHI nodes and an
// unconditional branch.
- EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+ EverMadeChange |= eliminateMostlyEmptyBlocks(F);
// llvm.dbg.value is far away from the value then iSel may not be able
// handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
- EverMadeChange |= PlaceDbgValues(F);
+ EverMadeChange |= placeDbgValues(F);
// If there is a mask, compare against zero, and branch that can be combined
// into a single target instruction, push the mask and compare into branch
@@ -251,9 +251,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
while (MadeChange) {
MadeChange = false;
for (Function::iterator I = F.begin(); I != F.end(); ) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
- MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
+ MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
@@ -296,7 +296,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Merge pairs of basic blocks with unconditional branches, connected by
// a single edge.
if (EverMadeChange || MadeChange)
- MadeChange |= EliminateFallThrough(F);
+ MadeChange |= eliminateFallThrough(F);
EverMadeChange |= MadeChange;
}
@@ -314,14 +314,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
-/// EliminateFallThrough - Merge basic blocks which are connected
-/// by a single edge, where one of the basic blocks has a single successor
-/// pointing to the other basic block, which has a single predecessor.
-bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+/// Merge basic blocks which are connected by a single edge, where one of the
+/// basic blocks has a single successor pointing to the other basic block,
+/// which has a single predecessor.
+bool CodeGenPrepare::eliminateFallThrough(Function &F) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
// If the destination block has a single pred, then this is a trivial
// edge, just collapse it.
BasicBlock *SinglePred = BB->getSinglePredecessor();
@@ -342,22 +342,21 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
BB->moveBefore(&BB->getParent()->getEntryBlock());
// We have erased a block. Update the iterator.
- I = BB;
+ I = BB->getIterator();
}
}
return Changed;
}
-/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
-/// debug info directives, and an unconditional branch. Passes before isel
-/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
-/// isel. Start by eliminating these blocks so we can split them the way we
-/// want them.
-bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
+/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
+/// edges in ways that are non-optimal for isel. Start by eliminating these
+/// blocks so we can split them the way we want them.
+bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
bool MadeChange = false;
// Note that this intentionally skips the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
// If this block doesn't end with an uncond branch, ignore it.
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
@@ -366,7 +365,7 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
// If the instruction before the branch (skipping debug info) isn't a phi
// node, then other stuff is happening here.
- BasicBlock::iterator BBI = BI;
+ BasicBlock::iterator BBI = BI->getIterator();
if (BBI != BB->begin()) {
--BBI;
while (isa<DbgInfoIntrinsic>(BBI)) {
@@ -383,19 +382,19 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
if (DestBB == BB)
continue;
- if (!CanMergeBlocks(BB, DestBB))
+ if (!canMergeBlocks(BB, DestBB))
continue;
- EliminateMostlyEmptyBlock(BB);
+ eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
return MadeChange;
}
-/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
-/// single uncond branch between them, and BB contains no other non-phi
+/// Return true if we can merge BB into DestBB if there is a single
+/// unconditional branch between them, and BB contains no other non-phi
/// instructions.
-bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
const BasicBlock *DestBB) const {
// We only want to eliminate blocks whose phi nodes are used by phi nodes in
// the successor. If there are more complex condition (e.g. preheaders),
@@ -461,9 +460,9 @@ bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
}
-/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
-/// an unconditional branch in it.
-void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+/// Eliminate a basic block that has only phi's and an unconditional branch in
+/// it.
+void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
BasicBlock *DestBB = BI->getSuccessor(0);
@@ -594,6 +593,14 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
continue;
}
+ if (RelocatedBase->getParent() != ToReplace->getParent()) {
+ // Base and derived relocates are in different basic blocks.
+ // In this case transform is only valid when base dominates derived
+ // relocate. However it would be too expensive to check dominance
+ // for each such relocate, so we skip the whole transformation.
+ continue;
+ }
+
Value *Base = ThisRelocate.getBasePtr();
auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
if (!Derived || Derived->getPointerOperand() != Base)
@@ -631,21 +638,20 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
// In this case, we can not find the bitcast any more. So we insert a new bitcast
// no matter there is already one or not. In this way, we can handle all cases, and
// the extra bitcast should be optimized away in later passes.
- Instruction *ActualRelocatedBase = RelocatedBase;
+ Value *ActualRelocatedBase = RelocatedBase;
if (RelocatedBase->getType() != Base->getType()) {
ActualRelocatedBase =
- cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
+ Builder.CreateBitCast(RelocatedBase, Base->getType());
}
Value *Replacement = Builder.CreateGEP(
Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
- Instruction *ReplacementInst = cast<Instruction>(Replacement);
Replacement->takeName(ToReplace);
// If the newly generated derived pointer's type does not match the original derived
// pointer's type, cast the new derived pointer to match it. Same reasoning as above.
- Instruction *ActualReplacement = ReplacementInst;
- if (ReplacementInst->getType() != ToReplace->getType()) {
+ Value *ActualReplacement = Replacement;
+ if (Replacement->getType() != ToReplace->getType()) {
ActualReplacement =
- cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
+ Builder.CreateBitCast(Replacement, ToReplace->getType());
}
ToReplace->replaceAllUsesWith(ActualReplacement);
ToReplace->eraseFromParent();
@@ -723,6 +729,12 @@ static bool SinkCast(CastInst *CI) {
// Preincrement use iterator so we don't invalidate it.
++UI;
+ // If the block selected to receive the cast is an EH pad that does not
+ // allow non-PHI instructions before the terminator, we can't sink the
+ // cast.
+ if (UserBB->getTerminator()->isEHPad())
+ continue;
+
// If this user is in the same block as the cast, don't change the cast.
if (UserBB == DefBB) continue;
@@ -731,9 +743,9 @@ static bool SinkCast(CastInst *CI) {
if (!InsertedCast) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedCast =
- CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
- InsertPt);
+ assert(InsertPt != UserBB->end());
+ InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
+ CI->getType(), "", &*InsertPt);
}
// Replace a use of the cast with a use of the new cast.
@@ -751,10 +763,9 @@ static bool SinkCast(CastInst *CI) {
return MadeChange;
}
-/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
-/// sink it into user blocks to reduce the number of virtual
-/// registers that must be created and coalesced.
+/// If the specified cast instruction is a noop copy (e.g. it's casting from
+/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
+/// reduce the number of virtual registers that must be created and coalesced.
///
/// Return true if any changes are made.
///
@@ -789,8 +800,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
return SinkCast(CI);
}
-/// CombineUAddWithOverflow - try to combine CI into a call to the
-/// llvm.uadd.with.overflow intrinsic if possible.
+/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
+/// possible.
///
/// Return true if any changes were made.
static bool CombineUAddWithOverflow(CmpInst *CI) {
@@ -818,7 +829,7 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
assert(*AddI->user_begin() == CI && "expected!");
#endif
- Module *M = CI->getParent()->getParent()->getParent();
+ Module *M = CI->getModule();
Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
@@ -836,16 +847,16 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
return true;
}
-/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
-/// the number of virtual registers that must be created and coalesced. This is
-/// a clear win except on targets with multiple condition code registers
-/// (PowerPC), where it might lose; some adjustment may be wanted there.
+/// Sink the given CmpInst into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced. This is a clear win except on
+/// targets with multiple condition code registers (PowerPC), where it might
+/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
static bool SinkCmpExpression(CmpInst *CI) {
BasicBlock *DefBB = CI->getParent();
- /// InsertedCmp - Only insert a cmp in each block once.
+ /// Only insert a cmp in each block once.
DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
bool MadeChange = false;
@@ -872,10 +883,10 @@ static bool SinkCmpExpression(CmpInst *CI) {
if (!InsertedCmp) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
InsertedCmp =
- CmpInst::Create(CI->getOpcode(),
- CI->getPredicate(), CI->getOperand(0),
- CI->getOperand(1), "", InsertPt);
+ CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
+ CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
}
// Replace a use of the cmp with a use of the new cmp.
@@ -903,8 +914,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
return false;
}
-/// isExtractBitsCandidateUse - Check if the candidates could
-/// be combined with shift instruction, which includes:
+/// Check if the candidates could be combined with a shift instruction, which
+/// includes:
/// 1. Truncate instruction
/// 2. And instruction and the imm is a mask of the low bits:
/// imm & (imm+1) == 0
@@ -922,8 +933,7 @@ static bool isExtractBitsCandidateUse(Instruction *User) {
return true;
}
-/// SinkShiftAndTruncate - sink both shift and truncate instruction
-/// to the use of truncate's BB.
+/// Sink both shift and truncate instruction to the use of truncate's BB.
static bool
SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
@@ -970,20 +980,22 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
if (!InsertedShift && !InsertedTrunc) {
BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
+ assert(InsertPt != TruncUserBB->end());
// Sink the shift
if (ShiftI->getOpcode() == Instruction::AShr)
- InsertedShift =
- BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
else
- InsertedShift =
- BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
// Sink the trunc
BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
TruncInsertPt++;
+ assert(TruncInsertPt != TruncUserBB->end());
InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
- TruncI->getType(), "", TruncInsertPt);
+ TruncI->getType(), "", &*TruncInsertPt);
MadeChange = true;
@@ -993,10 +1005,10 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
return MadeChange;
}
-/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
-/// the uses could potentially be combined with this shift instruction and
-/// generate BitExtract instruction. It will only be applied if the architecture
-/// supports BitExtract instruction. Here is an example:
+/// Sink the shift *right* instruction into user blocks if the uses could
+/// potentially be combined with this shift instruction and generate BitExtract
+/// instruction. It will only be applied if the architecture supports BitExtract
+/// instruction. Here is an example:
/// BB1:
/// %x.extract.shift = lshr i64 %arg1, 32
/// BB2:
@@ -1067,13 +1079,14 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
if (!InsertedShift) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
if (ShiftI->getOpcode() == Instruction::AShr)
- InsertedShift =
- BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
else
- InsertedShift =
- BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
MadeChange = true;
}
@@ -1089,10 +1102,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
return MadeChange;
}
-// ScalarizeMaskedLoad() translates masked load intrinsic, like
+// Translate a masked load intrinsic like
// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
// <16 x i1> %mask, <16 x i32> %passthru)
-// to a chain of basic blocks, whith loading element one-by-one if
+// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
//
// %1 = bitcast i8* %addr to i32*
@@ -1126,35 +1139,68 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
//
static void ScalarizeMaskedLoad(CallInst *CI) {
Value *Ptr = CI->getArgOperand(0);
- Value *Src0 = CI->getArgOperand(3);
+ Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
- Type *EltTy = VecType->getElementType();
+ Value *Src0 = CI->getArgOperand(3);
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
assert(VecType && "Unexpected return type of masked load intrinsic");
+ Type *EltTy = CI->getType()->getVectorElementType();
+
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
BasicBlock *CondBlock = nullptr;
BasicBlock *PrevIfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
+ Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask = isa<Constant>(Mask) &&
+ cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8);
// Bitcast %addr fron i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = VecType->getNumElements();
+
Value *UndefVal = UndefValue::get(VecType);
// The result vector
Value *VResult = UndefVal;
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal);
+ VResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
PHINode *Phi = nullptr;
Value *PrevPhi = UndefVal;
- unsigned VectorWidth = VecType->getNumElements();
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
@@ -1182,16 +1228,17 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
// %Elt = load i32* %EltAddr
// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
//
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
Builder.SetInsertPoint(InsertPt);
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst* Load = Builder.CreateLoad(Gep, false);
+ LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
// Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
@@ -1208,7 +1255,7 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
CI->eraseFromParent();
}
-// ScalarizeMaskedStore() translates masked store intrinsic, like
+// Translate a masked store intrinsic, like
// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
// <16 x i1> %mask)
// to a chain of basic blocks, that stores element one-by-one if
@@ -1237,34 +1284,61 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
// br label %else2
// . . .
static void ScalarizeMaskedStore(CallInst *CI) {
- Value *Ptr = CI->getArgOperand(1);
Value *Src = CI->getArgOperand(0);
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
Value *Mask = CI->getArgOperand(3);
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
VectorType *VecType = dyn_cast<VectorType>(Src->getType());
- Type *EltTy = VecType->getElementType();
-
assert(VecType && "Unexpected data type in masked store intrinsic");
+ Type *EltTy = VecType->getElementType();
+
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask = isa<Constant>(Mask) &&
+ cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Builder.CreateAlignedStore(Src, Ptr, AlignVal);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8);
// Bitcast %addr fron i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
-
unsigned VectorWidth = VecType->getNumElements();
+
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
// %to_store = icmp eq i1 %mask_1, true
- // br i1 %to_load, label %cond.store, label %else
+ // br i1 %to_store, label %cond.store, label %else
//
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
@@ -1276,13 +1350,146 @@ static void ScalarizeMaskedStore(CallInst *CI) {
// %EltAddr = getelementptr i32* %1, i32 0
// %store i32 %OneElt, i32* %EltAddr
//
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ BasicBlock *CondBlock =
+ IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
Builder.SetInsertPoint(InsertPt);
-
+
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- Builder.CreateStore(OneElt, Gep);
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+// Translate a masked gather intrinsic like
+// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
+// <16 x i1> %Mask, <16 x i32> %Src)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> %Mask, i32 0
+// % ToLoad0 = icmp eq i1 % Mask0, true
+// br i1 % ToLoad0, label %cond.load, label %else
+//
+// cond.load:
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// % Load0 = load i32, i32* % Ptr0, align 4
+// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
+// br label %else
+//
+// else:
+// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
+// % Mask1 = extractelement <16 x i1> %Mask, i32 1
+// % ToLoad1 = icmp eq i1 % Mask1, true
+// br i1 % ToLoad1, label %cond.load1, label %else2
+//
+// cond.load1:
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// % Load1 = load i32, i32* % Ptr1, align 4
+// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
+// br label %else2
+// . . .
+// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// ret <16 x i32> %Result
+static void ScalarizeMaskedGather(CallInst *CI) {
+ Value *Ptrs = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
+ "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %Mask1 = extractelement <16 x i1> %Mask, i32 1
+ // %ToLoad1 = icmp eq i1 %Mask1, true
+ // br i1 %ToLoad1, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate = Builder.CreateExtractElement(Mask,
+ Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToLoad" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
+ "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
@@ -1290,12 +1497,204 @@ static void ScalarizeMaskedStore(CallInst *CI) {
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
}
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
}
-bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
+// Translate a masked scatter intrinsic, like
+// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
+// <16 x i1> %Mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set.
+//
+// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> % Mask, i32 0
+// % ToStore0 = icmp eq i1 % Mask0, true
+// br i1 %ToStore0, label %cond.store, label %else
+//
+// cond.store:
+// % Elt0 = extractelement <16 x i32> %Src, i32 0
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* % Ptr0, align 4
+// br label %else
+//
+// else:
+// % Mask1 = extractelement <16 x i1> % Mask, i32 1
+// % ToStore1 = icmp eq i1 % Mask1, true
+// br i1 % ToStore1, label %cond.store1, label %else2
+//
+// cond.store1:
+// % Elt1 = extractelement <16 x i32> %Src, i32 1
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 % Elt1, i32* % Ptr1, align 4
+// br label %else2
+// . . .
+static void ScalarizeMaskedScatter(CallInst *CI) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptrs = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ assert(isa<VectorType>(Src->getType()) &&
+ "Unexpected data type in masked scatter intrinsic");
+ assert(isa<VectorType>(Ptrs->getType()) &&
+ isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
+ "Vector of pointers is expected in masked scatter intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ unsigned VectorWidth = Src->getType()->getVectorNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
+ // % ToStore = icmp eq i1 % Mask1, true
+ // br i1 % ToStore, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask,
+ Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp =
+ Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToStore" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // % Elt1 = extractelement <16 x i32> %Src, i32 1
+ // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+ // %store i32 % Elt1, i32* % Ptr1
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+/// If counting leading or trailing zeros is an expensive operation and a zero
+/// input is defined, add a check for zero to avoid calling the intrinsic.
+///
+/// We want to transform:
+/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+///
+/// into:
+/// entry:
+/// %cmpz = icmp eq i64 %A, 0
+/// br i1 %cmpz, label %cond.end, label %cond.false
+/// cond.false:
+/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+/// br label %cond.end
+/// cond.end:
+/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+///
+/// If the transform is performed, return true and set ModifiedDT to true.
+static bool despeculateCountZeros(IntrinsicInst *CountZeros,
+ const TargetLowering *TLI,
+ const DataLayout *DL,
+ bool &ModifiedDT) {
+ if (!TLI || !DL)
+ return false;
+
+ // If a zero input is undefined, it doesn't make sense to despeculate that.
+ if (match(CountZeros->getOperand(1), m_One()))
+ return false;
+
+ // If it's cheap to speculate, there's nothing to do.
+ auto IntrinsicID = CountZeros->getIntrinsicID();
+ if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
+ (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
+ return false;
+
+ // Only handle legal scalar cases. Anything else requires too much work.
+ Type *Ty = CountZeros->getType();
+ unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
+ if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize())
+ return false;
+
+ // The intrinsic will be sunk behind a compare against zero and branch.
+ BasicBlock *StartBlock = CountZeros->getParent();
+ BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
+
+ // Create another block after the count zero intrinsic. A PHI will be added
+ // in this block to select the result of the intrinsic or the bit-width
+ // constant if the input to the intrinsic is zero.
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
+ BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
+
+ // Set up a builder to create a compare, conditional branch, and PHI.
+ IRBuilder<> Builder(CountZeros->getContext());
+ Builder.SetInsertPoint(StartBlock->getTerminator());
+ Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
+
+ // Replace the unconditional branch that was created by the first split with
+ // a compare against zero and a conditional branch.
+ Value *Zero = Constant::getNullValue(Ty);
+ Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
+ Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
+ StartBlock->getTerminator()->eraseFromParent();
+
+ // Create a PHI in the end block to select either the output of the intrinsic
+ // or the bit width of the operand.
+ Builder.SetInsertPoint(&EndBlock->front());
+ PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
+ CountZeros->replaceAllUsesWith(PN);
+ Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
+ PN->addIncoming(BitWidth, StartBlock);
+ PN->addIncoming(CountZeros, CallBlock);
+
+ // We are explicitly handling the zero case, so we can set the intrinsic's
+ // undefined zero argument to 'true'. This will also prevent reprocessing the
+ // intrinsic; we only despeculate when a zero input is defined.
+ CountZeros->setArgOperand(1, Builder.getTrue());
+ ModifiedDT = true;
+ return true;
+}
+
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
BasicBlock *BB = CI->getParent();
// Lower inline assembly if we can.
@@ -1311,7 +1710,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return true;
}
// Sink address computing for memory operands into the block.
- if (OptimizeInlineAsmInst(CI))
+ if (optimizeInlineAsmInst(CI))
return true;
}
@@ -1372,14 +1771,14 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
// Substituting this can cause recursive simplifications, which can
// invalidate our iterator. Use a WeakVH to hold onto it in case this
// happens.
- WeakVH IterHandle(CurInstIterator);
+ WeakVH IterHandle(&*CurInstIterator);
replaceAndRecursivelySimplify(CI, RetVal,
TLInfo, nullptr);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
- if (IterHandle != CurInstIterator) {
+ if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
CurInstIterator = BB->begin();
SunkAddrs.clear();
}
@@ -1387,7 +1786,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
case Intrinsic::masked_load: {
// Scalarize unsupported vector masked load
- if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) {
+ if (!TTI->isLegalMaskedLoad(CI->getType())) {
ScalarizeMaskedLoad(CI);
ModifiedDT = true;
return true;
@@ -1395,13 +1794,29 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return false;
}
case Intrinsic::masked_store: {
- if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) {
+ if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
ScalarizeMaskedStore(CI);
ModifiedDT = true;
return true;
}
return false;
}
+ case Intrinsic::masked_gather: {
+ if (!TTI->isLegalMaskedGather(CI->getType())) {
+ ScalarizeMaskedGather(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_scatter: {
+ if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
+ ScalarizeMaskedScatter(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
@@ -1415,6 +1830,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
InsertedInsts.insert(ExtVal);
return true;
}
+ case Intrinsic::invariant_group_barrier:
+ II->replaceAllUsesWith(II->getArgOperand(0));
+ II->eraseFromParent();
+ return true;
+
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz:
+ // If counting zeros is expensive, try to avoid it.
+ return despeculateCountZeros(II, TLI, DL, ModifiedDT);
}
if (TLI) {
@@ -1426,7 +1850,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
Type *AccessTy;
if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
while (!PtrOps.empty())
- if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
+ if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
return true;
}
}
@@ -1447,9 +1871,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return false;
}
-/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
-/// instructions to the predecessor to enable tail call optimizations. The
-/// case it is currently looking for is:
+/// Look for opportunities to duplicate return instructions to the predecessor
+/// to enable tail call optimizations. The case it is currently looking for is:
/// @code
/// bb0:
/// %tmp0 = tail call i32 @f0()
@@ -1478,7 +1901,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
/// @endcode
-bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
if (!TLI)
return false;
@@ -1597,7 +2020,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
namespace {
-/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// This is an extended version of TargetLowering::AddrMode
/// which holds actual Value*'s for register values.
struct ExtAddrMode : public TargetLowering::AddrMode {
Value *BaseReg;
@@ -1709,10 +2132,10 @@ class TypePromotionTransaction {
public:
/// \brief Record the position of \p Inst.
InsertionHandler(Instruction *Inst) {
- BasicBlock::iterator It = Inst;
+ BasicBlock::iterator It = Inst->getIterator();
HasPrevInstruction = (It != (Inst->getParent()->begin()));
if (HasPrevInstruction)
- Point.PrevInst = --It;
+ Point.PrevInst = &*--It;
else
Point.BB = Inst->getParent();
}
@@ -1724,7 +2147,7 @@ class TypePromotionTransaction {
Inst->removeFromParent();
Inst->insertAfter(Point.PrevInst);
} else {
- Instruction *Position = Point.BB->getFirstInsertionPt();
+ Instruction *Position = &*Point.BB->getFirstInsertionPt();
if (Inst->getParent())
Inst->moveBefore(Position);
else
@@ -1797,7 +2220,7 @@ class TypePromotionTransaction {
Value *Val = Inst->getOperand(It);
OriginalValues.push_back(Val);
// Set a dummy one.
- // We could use OperandSetter here, but that would implied an overhead
+ // We could use OperandSetter here, but that would imply an overhead
// that we are not willing to pay.
Inst->setOperand(It, UndefValue::get(Val->getType()));
}
@@ -2111,7 +2534,7 @@ class AddressingModeMatcher {
unsigned AddrSpace;
Instruction *MemoryInst;
- /// AddrMode - This is the addressing mode that we're building up. This is
+ /// This is the addressing mode that we're building up. This is
/// part of the return value of this addressing mode matching stuff.
ExtAddrMode &AddrMode;
@@ -2122,9 +2545,8 @@ class AddressingModeMatcher {
/// The ongoing transaction where every action should be registered.
TypePromotionTransaction &TPT;
- /// IgnoreProfitability - This is set to true when we should not do
- /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
- /// always returns true.
+ /// This is set to true when we should not do profitability checks.
+ /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
bool IgnoreProfitability;
AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
@@ -2143,7 +2565,7 @@ class AddressingModeMatcher {
}
public:
- /// Match - Find the maximal addressing mode that a load/store of V can fold,
+ /// Find the maximal addressing mode that a load/store of V can fold,
/// give an access type of AccessTy. This returns a list of involved
/// instructions in AddrModeInsts.
/// \p InsertedInsts The instructions inserted by other CodeGenPrepare
@@ -2161,32 +2583,32 @@ public:
bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
MemoryInst, Result, InsertedInsts,
- PromotedInsts, TPT).MatchAddr(V, 0);
+ PromotedInsts, TPT).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
}
private:
- bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
- bool MatchAddr(Value *V, unsigned Depth);
- bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
+ bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+ bool matchAddr(Value *V, unsigned Depth);
+ bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
bool *MovedAway = nullptr);
- bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+ bool isProfitableToFoldIntoAddressingMode(Instruction *I,
ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter);
- bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
- bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
+ bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+ bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
Value *PromotedOperand) const;
};
-/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Try adding ScaleReg*Scale to the current addressing mode.
/// Return true and update AddrMode if this addr mode is legal for the target,
/// false if not.
-bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
unsigned Depth) {
// If Scale is 1, then this is the same as adding ScaleReg to the addressing
// mode. Just process that directly.
if (Scale == 1)
- return MatchAddr(ScaleReg, Depth);
+ return matchAddr(ScaleReg, Depth);
// If the scale is 0, it takes nothing to add this.
if (Scale == 0)
@@ -2233,9 +2655,9 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
return true;
}
-/// MightBeFoldableInst - This is a little filter, which returns true if an
-/// addressing computation involving I might be folded into a load/store
-/// accessing it. This doesn't need to be perfect, but needs to accept at least
+/// This is a little filter, which returns true if an addressing computation
+/// involving I might be folded into a load/store accessing it.
+/// This doesn't need to be perfect, but needs to accept at least
/// the set of instructions that MatchOperationAddr can.
static bool MightBeFoldableInst(Instruction *I) {
switch (I->getOpcode()) {
@@ -2301,9 +2723,7 @@ class TypePromotionHelper {
/// \brief Utility function to determine if \p OpIdx should be promoted when
/// promoting \p Inst.
static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
- if (isa<SelectInst>(Inst) && OpIdx == 0)
- return false;
- return true;
+ return !(isa<SelectInst>(Inst) && OpIdx == 0);
}
/// \brief Utility function to promote the operand of \p Ext when this
@@ -2413,8 +2833,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
Value *OpndVal = Inst->getOperand(0);
// Check if we can use this operand in the extension.
- // If the type is larger than the result type of the extension,
- // we cannot.
+ // If the type is larger than the result type of the extension, we cannot.
if (!OpndVal->getType()->isIntegerTy() ||
OpndVal->getType()->getIntegerBitWidth() >
ConsideredExtType->getIntegerBitWidth())
@@ -2433,18 +2852,16 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// #1 get the type of the operand and check the kind of the extended bits.
const Type *OpndType;
InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
- if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
- OpndType = It->second.Ty;
+ if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)
+ OpndType = It->second.getPointer();
else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
OpndType = Opnd->getOperand(0)->getType();
else
return false;
- // #2 check that the truncate just drop extended bits.
- if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
- return true;
-
- return false;
+ // #2 check that the truncate just drops extended bits.
+ return Inst->getType()->getIntegerBitWidth() >=
+ OpndType->getIntegerBitWidth();
}
TypePromotionHelper::Action TypePromotionHelper::getAction(
@@ -2553,7 +2970,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
}
TPT.replaceAllUsesWith(ExtOpnd, Trunc);
- // Restore the operand of Ext (which has been replace by the previous call
+ // Restore the operand of Ext (which has been replaced by the previous call
// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
TPT.setOperand(Ext, 0, ExtOpnd);
}
@@ -2631,8 +3048,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
return ExtOpnd;
}
-/// IsPromotionProfitable - Check whether or not promoting an instruction
-/// to a wider type was profitable.
+/// Check whether or not promoting an instruction to a wider type is profitable.
/// \p NewCost gives the cost of extension instructions created by the
/// promotion.
/// \p OldCost gives the cost of extension instructions before the promotion
@@ -2640,7 +3056,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
/// matched in the addressing mode the promotion.
/// \p PromotedOperand is the value that has been promoted.
/// \return True if the promotion is profitable, false otherwise.
-bool AddressingModeMatcher::IsPromotionProfitable(
+bool AddressingModeMatcher::isPromotionProfitable(
unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
// The cost of the new extensions is greater than the cost of the
@@ -2656,9 +3072,9 @@ bool AddressingModeMatcher::IsPromotionProfitable(
return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
}
-/// MatchOperationAddr - Given an instruction or constant expr, see if we can
-/// fold the operation into the addressing mode. If so, update the addressing
-/// mode and return true, otherwise return false without modifying AddrMode.
+/// Given an instruction or constant expr, see if we can fold the operation
+/// into the addressing mode. If so, update the addressing mode and return
+/// true, otherwise return false without modifying AddrMode.
/// If \p MovedAway is not NULL, it contains the information of whether or
/// not AddrInst has to be folded into the addressing mode on success.
/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
@@ -2667,7 +3083,7 @@ bool AddressingModeMatcher::IsPromotionProfitable(
/// This state can happen when AddrInst is a sext, since it may be moved away.
/// Therefore, AddrInst may not be valid when MovedAway is true and it must
/// not be referenced anymore.
-bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned Depth,
bool *MovedAway) {
// Avoid exponential behavior on extremely deep expression trees.
@@ -2680,13 +3096,13 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
switch (Opcode) {
case Instruction::PtrToInt:
// PtrToInt is always a noop, as we know that the int type is pointer sized.
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ return matchAddr(AddrInst->getOperand(0), Depth);
case Instruction::IntToPtr: {
auto AS = AddrInst->getType()->getPointerAddressSpace();
auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
// This inttoptr is a no-op if the integer type is pointer sized.
if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ return matchAddr(AddrInst->getOperand(0), Depth);
return false;
}
case Instruction::BitCast:
@@ -2698,14 +3114,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
// and we don't want to mess around with them. Assume it knows what it
// is doing.
AddrInst->getOperand(0)->getType() != AddrInst->getType())
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ return matchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::AddrSpaceCast: {
unsigned SrcAS
= AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ return matchAddr(AddrInst->getOperand(0), Depth);
return false;
}
case Instruction::Add: {
@@ -2719,8 +3135,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
- MatchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
+ matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
// Restore the old addr mode info.
@@ -2729,8 +3145,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TPT.rollback(LastKnownGood);
// Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
- MatchAddr(AddrInst->getOperand(1), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
+ matchAddr(AddrInst->getOperand(1), Depth+1))
return true;
// Otherwise we definitely can't merge the ADD in.
@@ -2752,7 +3168,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
if (Opcode == Instruction::Shl)
Scale = 1LL << Scale;
- return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
}
case Instruction::GetElementPtr: {
// Scan the GEP. We check it if it contains constant offsets and at most
@@ -2791,7 +3207,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
if (ConstantOffset == 0 ||
TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
}
AddrMode.BaseOffs -= ConstantOffset;
@@ -2806,7 +3222,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
AddrMode.BaseOffs += ConstantOffset;
// Match the base operand of the GEP.
- if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+ if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
// If it couldn't be matched, just stuff the value in a register.
if (AddrMode.HasBaseReg) {
AddrMode = BackupAddrMode;
@@ -2818,7 +3234,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
}
// Match the remaining variable portion of the GEP.
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
Depth)) {
// If it couldn't be matched, try stuffing the base into a register
// instead of matching it, and retrying the match of the scale.
@@ -2829,7 +3245,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
AddrMode.HasBaseReg = true;
AddrMode.BaseReg = AddrInst->getOperand(0);
AddrMode.BaseOffs += ConstantOffset;
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
VariableScale, Depth)) {
// If even that didn't work, bail.
AddrMode = BackupAddrMode;
@@ -2879,12 +3295,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
ExtAddrMode BackupAddrMode = AddrMode;
unsigned OldSize = AddrModeInsts.size();
- if (!MatchAddr(PromotedOperand, Depth) ||
- // The total of the new cost is equals to the cost of the created
+ if (!matchAddr(PromotedOperand, Depth) ||
+ // The total of the new cost is equal to the cost of the created
// instructions.
- // The total of the old cost is equals to the cost of the extension plus
+ // The total of the old cost is equal to the cost of the extension plus
// what we have saved in the addressing mode.
- !IsPromotionProfitable(CreatedInstsCost,
+ !isPromotionProfitable(CreatedInstsCost,
ExtCost + (AddrModeInsts.size() - OldSize),
PromotedOperand)) {
AddrMode = BackupAddrMode;
@@ -2899,12 +3315,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
return false;
}
-/// MatchAddr - If we can, try to add the value of 'Addr' into the current
-/// addressing mode. If Addr can't be added to AddrMode this returns false and
-/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
-/// or intptr_t for the target.
+/// If we can, try to add the value of 'Addr' into the current addressing mode.
+/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
+/// unmodified. This assumes that Addr is either a pointer type or intptr_t
+/// for the target.
///
-bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
// Start a transaction at this point that we will rollback if the matching
// fails.
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
@@ -2929,8 +3345,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
// Check to see if it is possible to fold this operation.
bool MovedAway = false;
- if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
- // This instruction may have been move away. If so, there is nothing
+ if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
+ // This instruction may have been moved away. If so, there is nothing
// to check here.
if (MovedAway)
return true;
@@ -2938,7 +3354,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
// *profitable* to do so. We use a simple cost model to avoid increasing
// register pressure too much.
if (I->hasOneUse() ||
- IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
AddrModeInsts.push_back(I);
return true;
}
@@ -2950,7 +3366,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
TPT.rollback(LastKnownGood);
}
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
- if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+ if (matchOperationAddr(CE, CE->getOpcode(), Depth))
return true;
TPT.rollback(LastKnownGood);
} else if (isa<ConstantPointerNull>(Addr)) {
@@ -2983,9 +3399,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
return false;
}
-/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
-/// inline asm call are due to memory operands. If so, return true, otherwise
-/// return false.
+/// Check to see if all uses of OpVal by the specified inline asm call are due
+/// to memory operands. If so, return true, otherwise return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetMachine &TM) {
const Function *F = CI->getParent()->getParent();
@@ -3011,8 +3426,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
return true;
}
-/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
-/// memory use. If we find an obviously non-foldable instruction, return true.
+/// Recursively walk all the uses of I until we find a memory use.
+/// If we find an obviously non-foldable instruction, return true.
/// Add the ultimately found memory instructions to MemoryUses.
static bool FindAllMemoryUses(
Instruction *I,
@@ -3059,11 +3474,11 @@ static bool FindAllMemoryUses(
return false;
}
-/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
-/// the use site that we're folding it into. If so, there is no cost to
-/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
-/// that we know are live at the instruction already.
-bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+/// Return true if Val is already known to be live at the use site that we're
+/// folding it into. If so, there is no cost to include it in the addressing
+/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
+/// instruction already.
+bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
Value *KnownLive2) {
// If Val is either of the known-live values, we know it is live!
if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
@@ -3085,11 +3500,11 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
return Val->isUsedInBasicBlock(MemoryInst->getParent());
}
-/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
-/// mode of the machine to fold the specified instruction into a load or store
-/// that ultimately uses it. However, the specified instruction has multiple
-/// uses. Given this, it may actually increase register pressure to fold it
-/// into the load. For example, consider this code:
+/// It is possible for the addressing mode of the machine to fold the specified
+/// instruction into a load or store that ultimately uses it.
+/// However, the specified instruction has multiple uses.
+/// Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
///
/// X = ...
/// Y = X+1
@@ -3107,7 +3522,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
/// X was live across 'load Z' for other reasons, we actually *would* want to
/// fold the addressing mode in the Z case. This would make Y die earlier.
bool AddressingModeMatcher::
-IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter) {
if (IgnoreProfitability) return true;
@@ -3124,9 +3539,9 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
// lifetime wasn't extended by adding this instruction.
- if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
BaseReg = nullptr;
- if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
ScaledReg = nullptr;
// If folding this instruction (and it's subexprs) didn't extend any live
@@ -3171,7 +3586,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
- bool Success = Matcher.MatchAddr(Address, 0);
+ bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
// The match was to check the profitability, the changes made are not
@@ -3192,7 +3607,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
} // end anonymous namespace
-/// IsNonLocalValue - Return true if the specified values are defined in a
+/// Return true if the specified values are defined in a
/// different basic block than BB.
static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
if (Instruction *I = dyn_cast<Instruction>(V))
@@ -3200,16 +3615,15 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
return false;
}
-/// OptimizeMemoryInst - Load and Store Instructions often have
-/// addressing modes that can do significant amounts of computation. As such,
-/// instruction selection will try to get the load or store to do as much
-/// computation as possible for the program. The problem is that isel can only
-/// see within a single block. As such, we sink as much legal addressing mode
-/// stuff into the block as possible.
+/// Load and Store Instructions often have addressing modes that can do
+/// significant amounts of computation. As such, instruction selection will try
+/// to get the load or store to do as much computation as possible for the
+/// program. The problem is that isel can only see within a single block. As
+/// such, we sink as much legal addressing mode work into the block as possible.
///
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
-bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *AccessTy, unsigned AddrSpace) {
Value *Repl = Addr;
@@ -3530,12 +3944,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (Repl->use_empty()) {
// This can cause recursive deletion, which can invalidate our iterator.
// Use a WeakVH to hold onto it in case this happens.
- WeakVH IterHandle(CurInstIterator);
+ WeakVH IterHandle(&*CurInstIterator);
BasicBlock *BB = CurInstIterator->getParent();
RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
- if (IterHandle != CurInstIterator) {
+ if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
CurInstIterator = BB->begin();
@@ -3546,10 +3960,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
return true;
}
-/// OptimizeInlineAsmInst - If there are any memory operands, use
-/// OptimizeMemoryInst to sink their address computing into the block when
-/// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
+/// If there are any memory operands, use OptimizeMemoryInst to sink their
+/// address computing into the block when possible / profitable.
+bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
const TargetRegisterInfo *TRI =
@@ -3566,7 +3979,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = CS->getArgOperand(ArgNo++);
- MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
+ MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
} else if (OpInfo.Type == InlineAsm::isInput)
ArgNo++;
}
@@ -3646,7 +4059,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
/// %add = add nuw i64 %zext, 4
/// \encode
/// Thanks to the promotion, we can match zext(load i32*) to i64.
-bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
+bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
LoadInst *&LI, Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
unsigned CreatedInstsCost = 0) {
@@ -3696,7 +4109,7 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
}
// The promotion is profitable.
// Check if it exposes an ext(load).
- (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+ (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
// If we have created a new extension, i.e., now we have two
// extensions. We must make sure one of them is merged with
@@ -3713,13 +4126,13 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
return false;
}
-/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
-/// basic block as the load, unless conditions are unfavorable. This allows
-/// SelectionDAG to fold the extend into the load.
+/// Move a zext or sext fed by a load into the same basic block as the load,
+/// unless conditions are unfavorable. This allows SelectionDAG to fold the
+/// extend into the load.
/// \p I[in/out] the extension may be modified during the process if some
/// promotions apply.
///
-bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
+bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
// Try to promote a chain of computation if it allows to form
// an extended load.
TypePromotionTransaction TPT;
@@ -3730,7 +4143,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
// Look for a load being extended.
LoadInst *LI = nullptr;
Instruction *OldExt = I;
- bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
+ bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
if (!LI || !I) {
assert(!HasPromoted && !LI && "If we did not match any load instruction "
"the code must remain the same");
@@ -3780,7 +4193,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
return true;
}
-bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
// If the result of a {s|z}ext and its source are both live out, rewrite all
@@ -3838,7 +4251,8 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
if (!InsertedTrunc) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+ assert(InsertPt != UserBB->end());
+ InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
InsertedInsts.insert(InsertedTrunc);
}
@@ -3851,9 +4265,202 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
return MadeChange;
}
-/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
-/// turned into an explicit branch.
-static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
+// Find loads whose uses only use some of the loaded value's bits. Add an "and"
+// just after the load if the target can fold this into one extload instruction,
+// with the hope of eliminating some of the other later "and" instructions using
+// the loaded value. "and"s that are made trivially redundant by the insertion
+// of the new "and" are removed by this function, while others (e.g. those whose
+// path from the load goes through a phi) are left for isel to potentially
+// remove.
+//
+// For example:
+//
+// b0:
+// x = load i32
+// ...
+// b1:
+// y = and x, 0xff
+// z = use y
+//
+// becomes:
+//
+// b0:
+// x = load i32
+// x' = and x, 0xff
+// ...
+// b1:
+// z = use x'
+//
+// whereas:
+//
+// b0:
+// x1 = load i32
+// ...
+// b1:
+// x2 = load i32
+// ...
+// b2:
+// x = phi x1, x2
+// y = and x, 0xff
+//
+// becomes (after a call to optimizeLoadExt for each load):
+//
+// b0:
+// x1 = load i32
+// x1' = and x1, 0xff
+// ...
+// b1:
+// x2 = load i32
+// x2' = and x2, 0xff
+// ...
+// b2:
+// x = phi x1', x2'
+// y = and x, 0xff
+//
+
+bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
+
+ if (!Load->isSimple() ||
+ !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
+ return false;
+
+ // Skip loads we've already transformed or have no reason to transform.
+ if (Load->hasOneUse()) {
+ User *LoadUser = *Load->user_begin();
+ if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
+ !dyn_cast<PHINode>(LoadUser))
+ return false;
+ }
+
+ // Look at all uses of Load, looking through phis, to determine how many bits
+ // of the loaded value are needed.
+ SmallVector<Instruction *, 8> WorkList;
+ SmallPtrSet<Instruction *, 16> Visited;
+ SmallVector<Instruction *, 8> AndsToMaybeRemove;
+ for (auto *U : Load->users())
+ WorkList.push_back(cast<Instruction>(U));
+
+ EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
+ unsigned BitWidth = LoadResultVT.getSizeInBits();
+ APInt DemandBits(BitWidth, 0);
+ APInt WidestAndBits(BitWidth, 0);
+
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.back();
+ WorkList.pop_back();
+
+ // Break use-def graph loops.
+ if (!Visited.insert(I).second)
+ continue;
+
+ // For a PHI node, push all of its users.
+ if (auto *Phi = dyn_cast<PHINode>(I)) {
+ for (auto *U : Phi->users())
+ WorkList.push_back(cast<Instruction>(U));
+ continue;
+ }
+
+ switch (I->getOpcode()) {
+ case llvm::Instruction::And: {
+ auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!AndC)
+ return false;
+ APInt AndBits = AndC->getValue();
+ DemandBits |= AndBits;
+ // Keep track of the widest and mask we see.
+ if (AndBits.ugt(WidestAndBits))
+ WidestAndBits = AndBits;
+ if (AndBits == WidestAndBits && I->getOperand(0) == Load)
+ AndsToMaybeRemove.push_back(I);
+ break;
+ }
+
+ case llvm::Instruction::Shl: {
+ auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!ShlC)
+ return false;
+ uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
+ auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt);
+ DemandBits |= ShlDemandBits;
+ break;
+ }
+
+ case llvm::Instruction::Trunc: {
+ EVT TruncVT = TLI->getValueType(*DL, I->getType());
+ unsigned TruncBitWidth = TruncVT.getSizeInBits();
+ auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth);
+ DemandBits |= TruncBits;
+ break;
+ }
+
+ default:
+ return false;
+ }
+ }
+
+ uint32_t ActiveBits = DemandBits.getActiveBits();
+ // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
+ // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
+ // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
+ // (and (load x) 1) is not matched as a single instruction, rather as a LDR
+ // followed by an AND.
+ // TODO: Look into removing this restriction by fixing backends to either
+ // return false for isLoadExtLegal for i1 or have them select this pattern to
+ // a single instruction.
+ //
+ // Also avoid hoisting if we didn't see any ands with the exact DemandBits
+ // mask, since these are the only ands that will be removed by isel.
+ if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
+ WidestAndBits != DemandBits)
+ return false;
+
+ LLVMContext &Ctx = Load->getType()->getContext();
+ Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
+ EVT TruncVT = TLI->getValueType(*DL, TruncTy);
+
+ // Reject cases that won't be matched as extloads.
+ if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
+ !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
+ return false;
+
+ IRBuilder<> Builder(Load->getNextNode());
+ auto *NewAnd = dyn_cast<Instruction>(
+ Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
+
+ // Replace all uses of load with new and (except for the use of load in the
+ // new and itself).
+ Load->replaceAllUsesWith(NewAnd);
+ NewAnd->setOperand(0, Load);
+
+ // Remove any and instructions that are now redundant.
+ for (auto *And : AndsToMaybeRemove)
+ // Check that the and mask is the same as the one we decided to put on the
+ // new and.
+ if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
+ And->replaceAllUsesWith(NewAnd);
+ if (&*CurInstIterator == And)
+ CurInstIterator = std::next(And->getIterator());
+ And->eraseFromParent();
+ ++NumAndUses;
+ }
+
+ ++NumAndsAdded;
+ return true;
+}
+
+/// Check if V (an operand of a select instruction) is an expensive instruction
+/// that is only used once.
+static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ // If it's safe to speculatively execute, then it should not have side
+ // effects; therefore, it's safe to sink and possibly *not* execute.
+ return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
+ TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;
+}
+
+/// Returns true if a SelectInst should be turned into an explicit branch.
+static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+ SelectInst *SI) {
// FIXME: This should use the same heuristics as IfConversion to determine
// whether a select is better represented as a branch. This requires that
// branch probability metadata is preserved for the select, which is not the
@@ -3861,28 +4468,36 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
- // If the branch is predicted right, an out of order CPU can avoid blocking on
- // the compare. Emit cmovs on compares with a memory operand as branches to
- // avoid stalls on the load from memory. If the compare has more than one use
- // there's probably another cmov or setcc around so it's not worth emitting a
- // branch.
- if (!Cmp)
+ // If a branch is predictable, an out-of-order CPU can avoid blocking on its
+ // comparison condition. If the compare has more than one use, there's
+ // probably another cmov or setcc around, so it's not worth emitting a branch.
+ if (!Cmp || !Cmp->hasOneUse())
return false;
Value *CmpOp0 = Cmp->getOperand(0);
Value *CmpOp1 = Cmp->getOperand(1);
- // We check that the memory operand has one use to avoid uses of the loaded
- // value directly after the compare, making branches unprofitable.
- return Cmp->hasOneUse() &&
- ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
- (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
+ // Emit "cmov on compare with a memory operand" as a branch to avoid stalls
+ // on a load from memory. But if the load is used more than once, do not
+ // change the select to a branch because the load is probably needed
+ // regardless of whether the branch is taken or not.
+ if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+ (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()))
+ return true;
+
+ // If either operand of the select is expensive and only needed on one side
+ // of the select, we should form a branch.
+ if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
+ sinkSelectOperand(TTI, SI->getFalseValue()))
+ return true;
+
+ return false;
}
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
-bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
+bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
// Can we convert the 'select' to CF ?
@@ -3902,34 +4517,97 @@ bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
// We have efficient codegen support for the select instruction.
// Check if it is profitable to keep this 'select'.
if (!TLI->isPredictableSelectExpensive() ||
- !isFormingBranchFromSelectProfitable(SI))
+ !isFormingBranchFromSelectProfitable(TTI, SI))
return false;
}
ModifiedDT = true;
+ // Transform a sequence like this:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %sel = select i1 %cmp, i32 %c, i32 %d
+ //
+ // Into:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // br i1 %cmp, label %select.true, label %select.false
+ // select.true:
+ // br label %select.end
+ // select.false:
+ // br label %select.end
+ // select.end:
+ // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+ //
+ // In addition, we may sink instructions that produce %c or %d from
+ // the entry block into the destination(s) of the new branch.
+ // If the true or false blocks do not contain a sunken instruction, that
+ // block and its branch may be optimized away. In that case, one side of the
+ // first branch will point directly to select.end, and the corresponding PHI
+ // predecessor block will be the start block.
+
// First, we split the block containing the select into 2 blocks.
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
- BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+ BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
- // Create a new block serving as the landing pad for the branch.
- BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
- NextBlock->getParent(), NextBlock);
-
- // Move the unconditional branch from the block with the select in it into our
- // landing pad block.
+ // Delete the unconditional branch that was just created by the split.
StartBlock->getTerminator()->eraseFromParent();
- BranchInst::Create(NextBlock, SmallBlock);
+
+ // These are the new basic blocks for the conditional branch.
+ // At least one will become an actual new basic block.
+ BasicBlock *TrueBlock = nullptr;
+ BasicBlock *FalseBlock = nullptr;
+
+ // Sink expensive instructions into the conditional blocks to avoid executing
+ // them speculatively.
+ if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+ TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
+ EndBlock->getParent(), EndBlock);
+ auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ auto *TrueInst = cast<Instruction>(SI->getTrueValue());
+ TrueInst->moveBefore(TrueBranch);
+ }
+ if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
+ EndBlock->getParent(), EndBlock);
+ auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ auto *FalseInst = cast<Instruction>(SI->getFalseValue());
+ FalseInst->moveBefore(FalseBranch);
+ }
+
+ // If there was nothing to sink, then arbitrarily choose the 'false' side
+ // for a new input value to the PHI.
+ if (TrueBlock == FalseBlock) {
+ assert(TrueBlock == nullptr &&
+ "Unexpected basic block transform while optimizing select");
+
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+ EndBlock->getParent(), EndBlock);
+ BranchInst::Create(EndBlock, FalseBlock);
+ }
// Insert the real conditional branch based on the original condition.
- BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
+ // If we did not create a new block for one of the 'true' or 'false' paths
+ // of the condition, it means that side of the branch goes to the end block
+ // directly and the path originates from the start block from the point of
+ // view of the new PHI.
+ if (TrueBlock == nullptr) {
+ BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
+ TrueBlock = StartBlock;
+ } else if (FalseBlock == nullptr) {
+ BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
+ FalseBlock = StartBlock;
+ } else {
+ BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
+ }
// The select itself is replaced with a PHI Node.
- PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
PN->takeName(SI);
- PN->addIncoming(SI->getTrueValue(), StartBlock);
- PN->addIncoming(SI->getFalseValue(), SmallBlock);
+ PN->addIncoming(SI->getTrueValue(), TrueBlock);
+ PN->addIncoming(SI->getFalseValue(), FalseBlock);
+
SI->replaceAllUsesWith(PN);
SI->eraseFromParent();
@@ -3955,7 +4633,7 @@ static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
/// it's often worth sinking a shufflevector splat down to its use so that
/// codegen can spot all lanes are identical.
-bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
BasicBlock *DefBB = SVI->getParent();
// Only do this xform if variable vector shifts are particularly expensive.
@@ -3987,9 +4665,10 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
if (!InsertedShuffle) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
- SVI->getOperand(1),
- SVI->getOperand(2), "", InsertPt);
+ assert(InsertPt != UserBB->end());
+ InsertedShuffle =
+ new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
+ SVI->getOperand(2), "", &*InsertPt);
}
UI->replaceUsesOfWith(SVI, InsertedShuffle);
@@ -4005,6 +4684,49 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
return MadeChange;
}
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+ if (!TLI || !DL)
+ return false;
+
+ Value *Cond = SI->getCondition();
+ Type *OldType = Cond->getType();
+ LLVMContext &Context = Cond->getContext();
+ MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
+ unsigned RegWidth = RegType.getSizeInBits();
+
+ if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
+ return false;
+
+ // If the register width is greater than the type width, expand the condition
+ // of the switch instruction and each case constant to the width of the
+ // register. By widening the type of the switch condition, subsequent
+ // comparisons (for case comparisons) will not need to be extended to the
+ // preferred register width, so we will potentially eliminate N-1 extends,
+ // where N is the number of cases in the switch.
+ auto *NewType = Type::getIntNTy(Context, RegWidth);
+
+ // Zero-extend the switch condition and case constants unless the switch
+ // condition is a function argument that is already being sign-extended.
+ // In that case, we can avoid an unnecessary mask/extension by sign-extending
+ // everything instead.
+ Instruction::CastOps ExtType = Instruction::ZExt;
+ if (auto *Arg = dyn_cast<Argument>(Cond))
+ if (Arg->hasSExtAttr())
+ ExtType = Instruction::SExt;
+
+ auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
+ ExtInst->insertBefore(SI);
+ SI->setCondition(ExtInst);
+ for (SwitchInst::CaseIt Case : SI->cases()) {
+ APInt NarrowConst = Case.getCaseValue()->getValue();
+ APInt WideConst = (ExtType == Instruction::ZExt) ?
+ NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
+ Case.setValue(ConstantInt::get(Context, WideConst));
+ }
+
+ return true;
+}
+
namespace {
/// \brief Helper class to promote a scalar operation to a vector one.
/// This class is used to move downward extractelement transition.
@@ -4138,7 +4860,7 @@ class VectorPromoteHelper {
/// \brief Generate a constant vector with \p Val with the same
/// number of elements as the transition.
/// \p UseSplat defines whether or not \p Val should be replicated
- /// accross the whole vector.
+ /// across the whole vector.
/// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
/// otherwise we generate a vector with as many undef as possible:
/// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
@@ -4320,7 +5042,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
/// Some targets can do store(extractelement) with one instruction.
/// Try to push the extractelement towards the stores when the target
/// has this feature and this is profitable.
-bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
+bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
unsigned CombineCost = UINT_MAX;
if (DisableStoreExtract || !TLI ||
(!StressStoreExtract &&
@@ -4372,7 +5094,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
return false;
}
-bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
if (InsertedInsts.count(I))
@@ -4413,8 +5135,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
- bool MadeChange = MoveExtToFormExtLoad(I);
- return MadeChange | OptimizeExtUses(I);
+ bool MadeChange = moveExtToFormExtLoad(I);
+ return MadeChange | optimizeExtUses(I);
}
}
return false;
@@ -4425,17 +5147,21 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
return OptimizeCmpExpression(CI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ stripInvariantGroupMetadata(*LI);
if (TLI) {
+ bool Modified = optimizeLoadExt(LI);
unsigned AS = LI->getPointerAddressSpace();
- return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ return Modified;
}
return false;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ stripInvariantGroupMetadata(*SI);
if (TLI) {
unsigned AS = SI->getPointerAddressSpace();
- return OptimizeMemoryInst(I, SI->getOperand(1),
+ return optimizeMemoryInst(I, SI->getOperand(1),
SI->getOperand(0)->getType(), AS);
}
return false;
@@ -4460,23 +5186,26 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
GEPI->replaceAllUsesWith(NC);
GEPI->eraseFromParent();
++NumGEPsElim;
- OptimizeInst(NC, ModifiedDT);
+ optimizeInst(NC, ModifiedDT);
return true;
}
return false;
}
if (CallInst *CI = dyn_cast<CallInst>(I))
- return OptimizeCallInst(CI, ModifiedDT);
+ return optimizeCallInst(CI, ModifiedDT);
if (SelectInst *SI = dyn_cast<SelectInst>(I))
- return OptimizeSelectInst(SI);
+ return optimizeSelectInst(SI);
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
- return OptimizeShuffleVectorInst(SVI);
+ return optimizeShuffleVectorInst(SVI);
+
+ if (auto *Switch = dyn_cast<SwitchInst>(I))
+ return optimizeSwitchInst(Switch);
if (isa<ExtractElementInst>(I))
- return OptimizeExtractElementInst(I);
+ return optimizeExtractElementInst(I);
return false;
}
@@ -4484,17 +5213,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
-bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
SunkAddrs.clear();
bool MadeChange = false;
CurInstIterator = BB.begin();
while (CurInstIterator != BB.end()) {
- MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
+ MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
if (ModifiedDT)
return true;
}
- MadeChange |= DupRetToEnableTailCallOpts(&BB);
+ MadeChange |= dupRetToEnableTailCallOpts(&BB);
return MadeChange;
}
@@ -4502,12 +5231,12 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
// llvm.dbg.value is far away from the value then iSel may not be able
// handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
-bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+bool CodeGenPrepare::placeDbgValues(Function &F) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
Instruction *PrevNonDbgInst = nullptr;
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
- Instruction *Insn = BI++;
+ Instruction *Insn = &*BI++;
DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
// Leave dbg.values that refer to an alloca alone. These
// instrinsics describe the address of a variable (= the alloca)
@@ -4521,10 +5250,14 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) {
Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+ // If VI is a phi in a block with an EHPad terminator, we can't insert
+ // after it.
+ if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+ continue;
DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
DVI->removeFromParent();
if (isa<PHINode>(VI))
- DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
+ DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
else
DVI->insertAfter(VI);
MadeChange = true;
@@ -4548,7 +5281,7 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) {
return false;
bool MadeChange = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
// Does this BB end with the following?
// %andVal = and %val, #single-bit-set
@@ -4671,6 +5404,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
continue;
+ auto *Br1 = cast<BranchInst>(BB.getTerminator());
+ if (Br1->getMetadata(LLVMContext::MD_unpredictable))
+ continue;
+
unsigned Opc;
Value *Cond1, *Cond2;
if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
@@ -4697,7 +5434,6 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
// Update original basic block by using the first condition directly by the
// branch instruction and removing the no longer needed and/or instruction.
- auto *Br1 = cast<BranchInst>(BB.getTerminator());
Br1->setCondition(Cond1);
LogicOp->eraseFromParent();
@@ -4828,3 +5564,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
return MadeChange;
}
+
+void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
+ if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group))
+ I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
+}
diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
index 28c97ba..ff7c0d5 100644
--- a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
+++ b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
@@ -38,9 +38,9 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
- Optional<bool> isGCManagedPointer(const Value *V) const override {
+ Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
- PointerType *PT = cast<PointerType>(V->getType());
+ const PointerType *PT = cast<PointerType>(Ty);
// We pick addrspace(1) as our GC managed heap.
return (1 == PT->getAddressSpace());
}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index dba280f..c924ba3 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -52,14 +52,13 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// Clear "do not change" set.
KeepRegs.reset();
- bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
+ bool IsReturnBlock = BB->isReturnBlock();
// Examine the live-in regs of all successors.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
- E = (*SI)->livein_end(); I != E; ++I) {
- for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ for (const auto &LI : (*SI)->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BBSize;
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index 0a188c0..af6b6a3 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -31,10 +31,39 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
-DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+// --------------------------------------------------------------------
+// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
+
+namespace {
+ DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
+ return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
+ }
+
+ /// Return the DFAInput for an instruction class input vector.
+ /// This function is used in both DFAPacketizer.cpp and in
+ /// DFAPacketizerEmitter.cpp.
+ DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
+ DFAInput InsnInput = 0;
+ assert ((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+ "Exceeded maximum number of DFA terms");
+ for (auto U : InsnClass)
+ InsnInput = addDFAFuncUnits(InsnInput, U);
+ return InsnInput;
+ }
+}
+// --------------------------------------------------------------------
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
+ const DFAStateInput (*SIT)[2],
const unsigned *SET):
InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
- DFAStateEntryTable(SET) {}
+ DFAStateEntryTable(SET) {
+ // Make sure DFA types are large enough for the number of terms & resources.
+ assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput))
+ && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
+ assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput))
+ && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
+}
//
@@ -60,26 +89,42 @@ void DFAPacketizer::ReadTable(unsigned int state) {
DFAStateInputTable[i][1];
}
+//
+// getInsnInput - Return the DFAInput for an instruction class.
+//
+DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
+ // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
+ DFAInput InsnInput = 0;
+ unsigned i = 0;
+ for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
+ *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) {
+ InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
+ assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
+ }
+ return InsnInput;
+}
+
+// getInsnInput - Return the DFAInput for an instruction class input vector.
+DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
+ return getDFAInsnInput(InsnClass);
+}
// canReserveResources - Check if the resources occupied by a MCInstrDesc
// are available in the current state.
bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
- const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
- unsigned FuncUnits = IS->getUnits();
- UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ DFAInput InsnInput = getInsnInput(InsnClass);
+ UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
ReadTable(CurrentState);
return (CachedTable.count(StateTrans) != 0);
}
-
// reserveResources - Reserve the resources occupied by a MCInstrDesc and
// change the current state to reflect that change.
void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
- const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
- unsigned FuncUnits = IS->getUnits();
- UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ DFAInput InsnInput = getInsnInput(InsnClass);
+ UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
ReadTable(CurrentState);
assert(CachedTable.count(StateTrans) != 0);
CurrentState = CachedTable[StateTrans];
@@ -104,32 +149,35 @@ namespace llvm {
// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
// Schedule method to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+private:
+ AliasAnalysis *AA;
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- bool IsPostRA);
+ AliasAnalysis *AA);
// Schedule - Actual scheduling work.
void schedule() override;
};
}
DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
- MachineLoopInfo &MLI, bool IsPostRA)
- : ScheduleDAGInstrs(MF, &MLI, IsPostRA) {
+ MachineLoopInfo &MLI,
+ AliasAnalysis *AA)
+ : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
CanHandleTerminators = true;
}
void DefaultVLIWScheduler::schedule() {
// Build the scheduling graph.
- buildSchedGraph(nullptr);
+ buildSchedGraph(AA);
}
// VLIWPacketizerList Ctor
VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF,
- MachineLoopInfo &MLI, bool IsPostRA)
- : MF(MF) {
+ MachineLoopInfo &MLI, AliasAnalysis *AA)
+ : MF(MF), AA(AA) {
TII = MF.getSubtarget().getInstrInfo();
ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
- VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, AA);
}
// VLIWPacketizerList Dtor
@@ -147,7 +195,7 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
MachineInstr *MI) {
if (CurrentPacketMIs.size() > 1) {
MachineInstr *MIFirst = CurrentPacketMIs.front();
- finalizeBundle(*MBB, MIFirst, MI);
+ finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator());
}
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
@@ -191,7 +239,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
// Ask DFA if machine resource is available for MI.
bool ResourceAvail = ResourceTracker->canReserveResources(MI);
- if (ResourceAvail) {
+ if (ResourceAvail && shouldAddToPacket(MI)) {
// Dependency check for MI with instructions in CurrentPacketMIs.
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
@@ -210,7 +258,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
} // !isLegalToPacketizeTogether.
} // For all instructions in CurrentPacketMIs.
} else {
- // End the packet if resource is not available.
+ // End the packet if resource is not available, or if the instruction
+ // shoud not be added to the current packet.
endPacket(MBB, MI);
}
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 941129b..b11b497 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -101,26 +101,22 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
// be cleaned up.
- for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
- I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
-
+ for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) {
// Start out assuming that reserved registers are live out of this block.
LivePhysRegs = MRI->getReservedRegs();
// Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
// live across blocks, but some targets (x86) can have flags live out of a
// block.
- for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
- E = MBB->succ_end(); S != E; S++)
- for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
- LI != (*S)->livein_end(); LI++)
- LivePhysRegs.set(*LI);
+ for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
+ E = MBB.succ_end(); S != E; S++)
+ for (const auto &LI : (*S)->liveins())
+ LivePhysRegs.set(LI.PhysReg);
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
- for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
- MIE = MBB->rend(); MII != MIE; ) {
+ for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(),
+ MIE = MBB.rend(); MII != MIE; ) {
MachineInstr *MI = &*MII;
// If the instruction is dead, delete it!
@@ -132,7 +128,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
MI->eraseFromParentAndMarkDBGValuesForRemoval();
AnyChanges = true;
++NumDeletes;
- MIE = MBB->rend();
+ MIE = MBB.rend();
// MII is now pointing to the next instruction to process,
// so don't increment it.
continue;
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index e019dfb..eae78a9 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -16,7 +16,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -192,9 +192,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
if (Resumes.empty())
return false;
- // Check the personality, don't do anything if it's for MSVC.
+ // Check the personality, don't do anything if it's funclet-based.
EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn());
- if (isMSVCEHPersonality(Pers))
+ if (isFuncletEHPersonality(Pers))
return false;
LLVMContext &Ctx = Fn.getContext();
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index fbc4d97..f3536d7 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -538,11 +538,11 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
// Fix up the CFG, temporarily leave Head without any successors.
Head->removeSuccessor(TBB);
- Head->removeSuccessor(FBB);
+ Head->removeSuccessor(FBB, true);
if (TBB != Tail)
- TBB->removeSuccessor(Tail);
+ TBB->removeSuccessor(Tail, true);
if (FBB != Tail)
- FBB->removeSuccessor(Tail);
+ FBB->removeSuccessor(Tail, true);
// Fix up Head's terminators.
// It should become a single branch or a fallthrough.
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index 5b09cf1..c550008 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -375,9 +375,8 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// This is the entry block.
if (MBB->pred_empty()) {
- for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
- e = MBB->livein_end(); i != e; ++i) {
- for (int rx : regIndices(*i)) {
+ for (const auto &LI : MBB->liveins()) {
+ for (int rx : regIndices(LI.PhysReg)) {
// Treat function live-ins as if they were defined just before the first
// instruction. Usually, function arguments are set up immediately
// before the call.
@@ -559,12 +558,11 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
MachineInstr *UndefMI = UndefReads.back().first;
unsigned OpIdx = UndefReads.back().second;
- for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend();
- I != E; ++I) {
+ for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
// Update liveness, including the current instruction's defs.
- LiveRegSet.stepBackward(*I);
+ LiveRegSet.stepBackward(I);
- if (UndefMI == &*I) {
+ if (UndefMI == &I) {
if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
TII->breakPartialRegDependency(UndefMI, OpIdx, TRI);
@@ -733,12 +731,13 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
// If no relevant registers are used in the function, we can skip it
// completely.
bool anyregs = false;
- for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
- I != E; ++I)
- if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ const MachineRegisterInfo &MRI = mf.getRegInfo();
+ for (unsigned Reg : *RC) {
+ if (MRI.isPhysRegUsed(Reg)) {
anyregs = true;
break;
}
+ }
if (!anyregs) return false;
// Initialize the AliasMap on the first use.
@@ -752,7 +751,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
AliasMap[*AI].push_back(i);
}
- MachineBasicBlock *Entry = MF->begin();
+ MachineBasicBlock *Entry = &*MF->begin();
ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
SmallVector<MachineBasicBlock*, 16> Loops;
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
@@ -761,22 +760,19 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
enterBasicBlock(MBB);
if (SeenUnknownBackEdge)
Loops.push_back(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I)
- visitInstr(I);
+ for (MachineInstr &MI : *MBB)
+ visitInstr(&MI);
processUndefReads(MBB);
leaveBasicBlock(MBB);
}
// Visit all the loop blocks again in order to merge DomainValues from
// back-edges.
- for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Loops[i];
+ for (MachineBasicBlock *MBB : Loops) {
enterBasicBlock(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I)
- if (!I->isDebugValue())
- processDefs(I, false);
+ for (MachineInstr &MI : *MBB)
+ if (!MI.isDebugValue())
+ processDefs(&MI, false);
processUndefReads(MBB);
leaveBasicBlock(MBB);
}
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
index 55e809e..90ddac9 100644
--- a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -50,7 +50,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
// Iterate through each instruction in the function, looking for pseudos.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = I;
+ MachineBasicBlock *MBB = &*I;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
MBBI != MBBE; ) {
MachineInstr *MI = MBBI++;
@@ -63,7 +63,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
// The expansion may involve new basic blocks.
if (NewMBB != MBB) {
MBB = NewMBB;
- I = NewMBB;
+ I = NewMBB->getIterator();
MBBI = NewMBB->begin();
MBBE = NewMBB->end();
}
diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
new file mode 100644
index 0000000..8b2f505
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -0,0 +1,55 @@
+//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations which result in
+// funclets being contiguous.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "funclet-layout"
+
+namespace {
+class FuncletLayout : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ FuncletLayout() : MachineFunctionPass(ID) {
+ initializeFuncletLayoutPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+}
+
+char FuncletLayout::ID = 0;
+char &llvm::FuncletLayoutID = FuncletLayout::ID;
+INITIALIZE_PASS(FuncletLayout, "funclet-layout",
+ "Contiguously Lay Out Funclets", false, false)
+
+bool FuncletLayout::runOnMachineFunction(MachineFunction &F) {
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership =
+ getFuncletMembership(F);
+ if (FuncletMembership.empty())
+ return false;
+
+ F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
+ auto FuncletX = FuncletMembership.find(&X);
+ auto FuncletY = FuncletMembership.find(&Y);
+ assert(FuncletX != FuncletMembership.end());
+ assert(FuncletY != FuncletMembership.end());
+ return FuncletX->second < FuncletY->second;
+ });
+
+ // Conservatively assume we changed something.
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index d8edd7e..484d317 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -158,7 +158,7 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
// Search for initializers in the initial BB.
SmallPtrSet<AllocaInst *, 16> InitedRoots;
- for (; !CouldBecomeSafePoint(IP); ++IP)
+ for (; !CouldBecomeSafePoint(&*IP); ++IP)
if (StoreInst *SI = dyn_cast<StoreInst>(IP))
if (AllocaInst *AI =
dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
@@ -320,7 +320,9 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
RI = FI->removeStackRoot(RI);
} else {
- RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+ unsigned FrameReg; // FIXME: surely GCRoot ought to store the
+ // register that the offset is from?
+ RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
++RI;
}
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 6f9e839..dd9a840 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -108,10 +108,9 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
// FIXME: this could be a transitional option, and we probably need to remove
// it if only we are sure this optimization could always benefit all targets.
-static cl::opt<bool>
+static cl::opt<cl::boolOrDefault>
EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
- cl::desc("Enable global merge pass on external linkage"),
- cl::init(false));
+ cl::desc("Enable global merge pass on external linkage"));
STATISTIC(NumMerged, "Number of globals merged");
namespace {
@@ -129,11 +128,14 @@ namespace {
/// FIXME: This could learn about optsize, and be used in the cost model.
bool OnlyOptimizeForSize;
+ /// Whether we should merge global variables that have external linkage.
+ bool MergeExternalGlobals;
+
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
/// \brief Merge everything in \p Globals for which the corresponding bit
/// in \p GlobalSet is set.
- bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+ bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
const BitVector &GlobalSet, Module &M, bool isConst,
unsigned AddrSpace) const;
@@ -158,9 +160,11 @@ namespace {
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetMachine *TM = nullptr,
unsigned MaximalOffset = 0,
- bool OnlyOptimizeForSize = false)
+ bool OnlyOptimizeForSize = false,
+ bool MergeExternalGlobals = false)
: FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
- OnlyOptimizeForSize(OnlyOptimizeForSize) {
+ OnlyOptimizeForSize(OnlyOptimizeForSize),
+ MergeExternalGlobals(MergeExternalGlobals) {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
@@ -189,14 +193,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
auto &DL = M.getDataLayout();
// FIXME: Find better heuristics
- std::stable_sort(
- Globals.begin(), Globals.end(),
- [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
- Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
-
- return (DL.getTypeAllocSize(Ty1) < DL.getTypeAllocSize(Ty2));
- });
+ std::stable_sort(Globals.begin(), Globals.end(),
+ [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ return DL.getTypeAllocSize(GV1->getValueType()) <
+ DL.getTypeAllocSize(GV2->getValueType());
+ });
// If we want to just blindly group all globals together, do so.
if (!GlobalMergeGroupByUse) {
@@ -207,7 +208,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// If we want to be smarter, look at all uses of each global, to try to
// discover all sets of globals used together, and how many times each of
- // these sets occured.
+ // these sets occurred.
//
// Keep this reasonably efficient, by having an append-only list of all sets
// discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
@@ -302,8 +303,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Function *ParentFn = I->getParent()->getParent();
// If we're only optimizing for size, ignore non-minsize functions.
- if (OnlyOptimizeForSize &&
- !ParentFn->hasFnAttribute(Attribute::MinSize))
+ if (OnlyOptimizeForSize && !ParentFn->optForMinSize())
continue;
size_t UGSIdx = GlobalUsesByFunction[ParentFn];
@@ -406,15 +406,14 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
return Changed;
}
-bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
const BitVector &GlobalSet, Module &M, bool isConst,
unsigned AddrSpace) const {
+ assert(Globals.size() > 1);
Type *Int32Ty = Type::getInt32Ty(M.getContext());
auto &DL = M.getDataLayout();
- assert(Globals.size() > 1);
-
DEBUG(dbgs() << " Trying to merge set, starts with #"
<< GlobalSet.find_first() << "\n");
@@ -425,58 +424,44 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
std::vector<Type*> Tys;
std::vector<Constant*> Inits;
- bool HasExternal = false;
- GlobalVariable *TheFirstExternal = 0;
for (j = i; j != -1; j = GlobalSet.find_next(j)) {
- Type *Ty = Globals[j]->getType()->getElementType();
+ Type *Ty = Globals[j]->getValueType();
MergedSize += DL.getTypeAllocSize(Ty);
if (MergedSize > MaxOffset) {
break;
}
Tys.push_back(Ty);
Inits.push_back(Globals[j]->getInitializer());
-
- if (Globals[j]->hasExternalLinkage() && !HasExternal) {
- HasExternal = true;
- TheFirstExternal = Globals[j];
- }
}
- // If merged variables doesn't have external linkage, we needn't to expose
- // the symbol after merging.
- GlobalValue::LinkageTypes Linkage = HasExternal
- ? GlobalValue::ExternalLinkage
- : GlobalValue::InternalLinkage;
-
StructType *MergedTy = StructType::get(M.getContext(), Tys);
Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
- // If merged variables have external linkage, we use symbol name of the
- // first variable merged as the suffix of global symbol name. This would
- // be able to avoid the link-time naming conflict for globalm symbols.
GlobalVariable *MergedGV = new GlobalVariable(
- M, MergedTy, isConst, Linkage, MergedInit,
- HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName()
- : "_MergedGlobals",
- nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+ M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit,
+ "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
- for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k)) {
+ for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
std::string Name = Globals[k]->getName();
Constant *Idx[2] = {
ConstantInt::get(Int32Ty, 0),
- ConstantInt::get(Int32Ty, idx++)
+ ConstantInt::get(Int32Ty, idx),
};
Constant *GEP =
ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
- if (Linkage != GlobalValue::InternalLinkage) {
- // Generate a new alias...
- auto *PTy = cast<PointerType>(GEP->getType());
- GlobalAlias::create(PTy, Linkage, Name, GEP, &M);
+ // When the linkage is not internal we must emit an alias for the original
+ // variable name as it may be accessed from another object. On non-Mach-O
+ // we can also emit an alias for internal linkage as it's safe to do so.
+ // It's not safe on Mach-O as the alias (and thus the portion of the
+ // MergedGlobals variable) may be dead stripped at link time.
+ if (Linkage != GlobalValue::InternalLinkage ||
+ !TM->getTargetTriple().isOSBinFormatMachO()) {
+ GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M);
}
NumMerged++;
@@ -535,61 +520,57 @@ bool GlobalMerge::doInitialization(Module &M) {
setMustKeepGlobalVariables(M);
// Grab all non-const globals.
- for (Module::global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I) {
+ for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
- if (I->isDeclaration() || I->isThreadLocal() || I->hasSection())
+ if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection())
continue;
- if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) &&
- !I->hasInternalLinkage())
+ if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
+ !GV.hasInternalLinkage())
continue;
- PointerType *PT = dyn_cast<PointerType>(I->getType());
+ PointerType *PT = dyn_cast<PointerType>(GV.getType());
assert(PT && "Global variable is not a pointer!");
unsigned AddressSpace = PT->getAddressSpace();
// Ignore fancy-aligned globals for now.
- unsigned Alignment = DL.getPreferredAlignment(I);
- Type *Ty = I->getType()->getElementType();
+ unsigned Alignment = DL.getPreferredAlignment(&GV);
+ Type *Ty = GV.getValueType();
if (Alignment > DL.getABITypeAlignment(Ty))
continue;
// Ignore all 'special' globals.
- if (I->getName().startswith("llvm.") ||
- I->getName().startswith(".llvm."))
+ if (GV.getName().startswith("llvm.") ||
+ GV.getName().startswith(".llvm."))
continue;
// Ignore all "required" globals:
- if (isMustKeepGlobalVariable(I))
+ if (isMustKeepGlobalVariable(&GV))
continue;
if (DL.getTypeAllocSize(Ty) < MaxOffset) {
- if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal())
- BSSGlobals[AddressSpace].push_back(I);
- else if (I->isConstant())
- ConstGlobals[AddressSpace].push_back(I);
+ if (TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
+ BSSGlobals[AddressSpace].push_back(&GV);
+ else if (GV.isConstant())
+ ConstGlobals[AddressSpace].push_back(&GV);
else
- Globals[AddressSpace].push_back(I);
+ Globals[AddressSpace].push_back(&GV);
}
}
- for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
- I = Globals.begin(), E = Globals.end(); I != E; ++I)
- if (I->second.size() > 1)
- Changed |= doMerge(I->second, M, false, I->first);
+ for (auto &P : Globals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, false, P.first);
- for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
- I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I)
- if (I->second.size() > 1)
- Changed |= doMerge(I->second, M, false, I->first);
+ for (auto &P : BSSGlobals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, false, P.first);
if (EnableGlobalMergeOnConst)
- for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
- I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
- if (I->second.size() > 1)
- Changed |= doMerge(I->second, M, true, I->first);
+ for (auto &P : ConstGlobals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, true, P.first);
return Changed;
}
@@ -604,6 +585,9 @@ bool GlobalMerge::doFinalization(Module &M) {
}
Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
- bool OnlyOptimizeForSize) {
- return new GlobalMerge(TM, Offset, OnlyOptimizeForSize);
+ bool OnlyOptimizeForSize,
+ bool MergeExternalByDefault) {
+ bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
+ MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
+ return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
}
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index ee0532b..c38c9d2 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -32,6 +32,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
using namespace llvm;
@@ -190,10 +191,10 @@ namespace {
private:
bool ReverseBranchCondition(BBInfo &BBI);
bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
- const BranchProbability &Prediction) const;
+ BranchProbability Prediction) const;
bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
- const BranchProbability &Prediction) const;
+ BranchProbability Prediction) const;
bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
unsigned &Dups1, unsigned &Dups2) const;
void ScanInstructions(BBInfo &BBI);
@@ -218,7 +219,7 @@ namespace {
bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
unsigned Cycle, unsigned Extra,
- const BranchProbability &Prediction) const {
+ BranchProbability Prediction) const {
return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
Prediction);
}
@@ -227,7 +228,7 @@ namespace {
unsigned TCycle, unsigned TExtra,
MachineBasicBlock &FBB,
unsigned FCycle, unsigned FExtra,
- const BranchProbability &Prediction) const {
+ BranchProbability Prediction) const {
return TCycle > 0 && FCycle > 0 &&
TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
Prediction);
@@ -462,11 +463,11 @@ bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
/// getNextBlock - Returns the next block in the function blocks ordering. If
/// it is the end, returns NULL.
static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
- MachineFunction::iterator I = BB;
+ MachineFunction::iterator I = BB->getIterator();
MachineFunction::iterator E = BB->getParent()->end();
if (++I == E)
return nullptr;
- return I;
+ return &*I;
}
/// ValidSimple - Returns true if the 'true' block (along with its
@@ -474,7 +475,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
/// number of instructions that the ifcvt would need to duplicate if performed
/// in Dups.
bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
- const BranchProbability &Prediction) const {
+ BranchProbability Prediction) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -501,7 +502,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
/// if performed in 'Dups'.
bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
- const BranchProbability &Prediction) const {
+ BranchProbability Prediction) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -530,10 +531,10 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
- MachineFunction::iterator I = TrueBBI.BB;
+ MachineFunction::iterator I = TrueBBI.BB->getIterator();
if (++I == TrueBBI.BB->getParent()->end())
return false;
- TExit = I;
+ TExit = &*I;
}
return TExit && TExit == FalseBBI.BB;
}
@@ -948,10 +949,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
/// candidates.
void IfConverter::AnalyzeBlocks(MachineFunction &MF,
std::vector<IfcvtToken*> &Tokens) {
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *BB = I;
- AnalyzeBlock(BB, Tokens);
- }
+ for (auto &BB : MF)
+ AnalyzeBlock(&BB, Tokens);
// Sort to favor more complex ifcvt scheme.
std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
@@ -961,14 +960,14 @@ void IfConverter::AnalyzeBlocks(MachineFunction &MF,
/// that all the intervening blocks are empty (given BB can fall through to its
/// next block).
static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
- MachineFunction::iterator PI = BB;
+ MachineFunction::iterator PI = BB->getIterator();
MachineFunction::iterator I = std::next(PI);
- MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator TI = ToBB->getIterator();
MachineFunction::iterator E = BB->getParent()->end();
while (I != TI) {
// Check isSuccessor to avoid case where the next block is empty, but
// it's not a successor.
- if (I == E || !I->empty() || !PI->isSuccessor(I))
+ if (I == E || !I->empty() || !PI->isSuccessor(&*I))
return false;
PI = I++;
}
@@ -1114,7 +1113,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// RemoveExtraEdges won't work if the block has an unanalyzable branch, so
// explicitly remove CvtBBI as a successor.
- BBI.BB->removeSuccessor(CvtBBI->BB);
+ BBI.BB->removeSuccessor(CvtBBI->BB, true);
} else {
RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI);
PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
@@ -1153,28 +1152,6 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
return true;
}
-/// Scale down weights to fit into uint32_t. NewTrue is the new weight
-/// for successor TrueBB, and NewFalse is the new weight for successor
-/// FalseBB.
-static void ScaleWeights(uint64_t NewTrue, uint64_t NewFalse,
- MachineBasicBlock *MBB,
- const MachineBasicBlock *TrueBB,
- const MachineBasicBlock *FalseBB,
- const MachineBranchProbabilityInfo *MBPI) {
- uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
- uint32_t Scale = (NewMax / UINT32_MAX) + 1;
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end();
- SI != SE; ++SI) {
- if (*SI == TrueBB)
- MBB->setSuccWeight(SI, (uint32_t)(NewTrue / Scale));
- else if (*SI == FalseBB)
- MBB->setSuccWeight(SI, (uint32_t)(NewFalse / Scale));
- else
- MBB->setSuccWeight(SI, MBPI->getEdgeWeight(MBB, SI) / Scale);
- }
-}
-
/// IfConvertTriangle - If convert a triangle sub-CFG.
///
bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
@@ -1231,16 +1208,14 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
DontKill.clear();
bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
- uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0;
- uint32_t WeightScale = 0;
+ BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
if (HasEarlyExit) {
- // Get weights before modifying CvtBBI->BB and BBI.BB.
- CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB);
- CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB);
- BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB);
- BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB);
- SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale);
+ // Get probabilities before modifying CvtBBI->BB and BBI.BB.
+ CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB);
+ CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB);
+ BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB);
+ BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB);
}
if (CvtBBI->BB->pred_size() > 1) {
@@ -1251,7 +1226,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// RemoveExtraEdges won't work if the block has an unanalyzable branch, so
// explicitly remove CvtBBI as a successor.
- BBI.BB->removeSuccessor(CvtBBI->BB);
+ BBI.BB->removeSuccessor(CvtBBI->BB, true);
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
@@ -1268,22 +1243,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
CvtBBI->BrCond.end());
if (TII->ReverseBranchCondition(RevCond))
llvm_unreachable("Unable to reverse branch condition!");
+
+ // Update the edge probability for both CvtBBI->FalseBB and NextBBI.
+ // NewNext = New_Prob(BBI.BB, NextBBI->BB) =
+ // Prob(BBI.BB, NextBBI->BB) +
+ // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB)
+ // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
+ // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB)
+ auto NewTrueBB = getNextBlock(BBI.BB);
+ auto NewNext = BBNext + BBCvt * CvtNext;
+ auto NewTrueBBIter =
+ std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB);
+ if (NewTrueBBIter != BBI.BB->succ_end())
+ BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
+
+ auto NewFalse = BBCvt * CvtFalse;
TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
- BBI.BB->addSuccessor(CvtBBI->FalseBB);
- // Update the edge weight for both CvtBBI->FalseBB and NextBBI.
- // New_Weight(BBI.BB, NextBBI->BB) =
- // Weight(BBI.BB, NextBBI->BB) * getSumForBlock(CvtBBI->BB) +
- // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, NextBBI->BB)
- // New_Weight(BBI.BB, CvtBBI->FalseBB) =
- // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB)
-
- uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale;
- uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale;
- // We need to scale down all weights of BBI.BB to fit uint32_t.
- // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to
- // the next block.
- ScaleWeights(NewNext, NewFalse, BBI.BB, getNextBlock(BBI.BB),
- CvtBBI->FalseBB, MBPI);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
}
// Merge in the 'false' block if the 'false' block has no other
@@ -1526,7 +1502,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
MergeBlocks(BBI, TailBBI);
TailBBI.IsDone = true;
} else {
- BBI.BB->addSuccessor(TailBB);
+ BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
InsertUncondBranch(BBI.BB, TailBB, TII);
BBI.HasFallThrough = false;
}
@@ -1536,7 +1512,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// which can happen here if TailBB is unanalyzable and is merged, so
// explicitly remove BBI1 and BBI2 as successors.
BBI.BB->removeSuccessor(BBI1->BB);
- BBI.BB->removeSuccessor(BBI2->BB);
+ BBI.BB->removeSuccessor(BBI2->BB, true);
RemoveExtraEdges(BBI);
// Update block info.
@@ -1686,25 +1662,94 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
ToBBI.BB->splice(ToBBI.BB->end(),
FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
- std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
- FromBBI.BB->succ_end());
+ // Force normalizing the successors' probabilities of ToBBI.BB to convert all
+ // unknown probabilities into known ones.
+ // FIXME: This usage is too tricky and in the future we would like to
+ // eliminate all unknown probabilities in MBB.
+ ToBBI.BB->normalizeSuccProbs();
+
+ SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
+ // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when
+ // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB.
+ auto To2FromProb = BranchProbability::getZero();
+ if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) {
+ To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB);
+ // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the
+ // edge probability being merged to other edges when this edge is removed
+ // later.
+ ToBBI.BB->setSuccProbability(
+ std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB),
+ BranchProbability::getZero());
+ }
- for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
- MachineBasicBlock *Succ = Succs[i];
+ for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = FromSuccs[i];
// Fallthrough edge can't be transferred.
if (Succ == FallThrough)
continue;
+
+ auto NewProb = BranchProbability::getZero();
+ if (AddEdges) {
+ // Calculate the edge probability for the edge from ToBBI.BB to Succ,
+ // which is a portion of the edge probability from FromBBI.BB to Succ. The
+ // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if
+ // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
+ NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ);
+
+ // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This
+ // only happens when if-converting a diamond CFG and FromBBI.BB is the
+ // tail BB. In this case FromBBI.BB post-dominates ToBBI.BB and hence we
+ // could just use the probabilities on FromBBI.BB's out-edges when adding
+ // new successors.
+ if (!To2FromProb.isZero())
+ NewProb *= To2FromProb;
+ }
+
FromBBI.BB->removeSuccessor(Succ);
- if (AddEdges && !ToBBI.BB->isSuccessor(Succ))
- ToBBI.BB->addSuccessor(Succ);
+
+ if (AddEdges) {
+ // If the edge from ToBBI.BB to Succ already exists, update the
+ // probability of this edge by adding NewProb to it. An example is shown
+ // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we
+ // don't have to set C as A's successor as it already is. We only need to
+ // update the edge probability on A->C. Note that B will not be
+ // immediately removed from A's successors. It is possible that B->D is
+ // not removed either if D is a fallthrough of B. Later the edge A->D
+ // (generated here) and B->D will be combined into one edge. To maintain
+ // correct edge probability of this combined edge, we need to set the edge
+ // probability of A->B to zero, which is already done above. The edge
+ // probability on A->D is calculated by scaling the original probability
+ // on A->B by the probability of B->D.
+ //
+ // Before ifcvt: After ifcvt (assume B->D is kept):
+ //
+ // A A
+ // /| /|\
+ // / B / B|
+ // | /| | ||
+ // |/ | | |/
+ // C D C D
+ //
+ if (ToBBI.BB->isSuccessor(Succ))
+ ToBBI.BB->setSuccProbability(
+ std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ),
+ MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
+ else
+ ToBBI.BB->addSuccessor(Succ, NewProb);
+ }
}
// Now FromBBI always falls through to the next block!
if (NBB && !FromBBI.BB->isSuccessor(NBB))
FromBBI.BB->addSuccessor(NBB);
+ // Normalize the probabilities of ToBBI.BB's successors with all adjustment
+ // we've done above.
+ ToBBI.BB->normalizeSuccProbs();
+
ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
FromBBI.Predicate.clear();
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 93e0487..39c1b9f 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -107,6 +108,98 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
+
+/// \brief Detect re-ordering hazards and dependencies.
+///
+/// This class keeps track of defs and uses, and can be queried if a given
+/// machine instruction can be re-ordered from after the machine instructions
+/// seen so far to before them.
+class HazardDetector {
+ DenseSet<unsigned> RegDefs;
+ DenseSet<unsigned> RegUses;
+ const TargetRegisterInfo &TRI;
+ bool hasSeenClobber;
+
+public:
+ explicit HazardDetector(const TargetRegisterInfo &TRI) :
+ TRI(TRI), hasSeenClobber(false) {}
+
+ /// \brief Make a note of \p MI for later queries to isSafeToHoist.
+ ///
+ /// May clobber this HazardDetector instance. \see isClobbered.
+ void rememberInstruction(MachineInstr *MI);
+
+ /// \brief Return true if it is safe to hoist \p MI from after all the
+ /// instructions seen so far (via rememberInstruction) to before it.
+ bool isSafeToHoist(MachineInstr *MI);
+
+ /// \brief Return true if this instance of HazardDetector has been clobbered
+ /// (i.e. has no more useful information).
+ ///
+ /// A HazardDetecter is clobbered when it sees a construct it cannot
+ /// understand, and it would have to return a conservative answer for all
+ /// future queries. Having a separate clobbered state lets the client code
+ /// bail early, without making queries about all of the future instructions
+ /// (which would have returned the most conservative answer anyway).
+ ///
+ /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector
+ /// is an error.
+ bool isClobbered() { return hasSeenClobber; }
+};
+}
+
+
+void HazardDetector::rememberInstruction(MachineInstr *MI) {
+ assert(!isClobbered() &&
+ "Don't add instructions to a clobbered hazard detector");
+
+ if (MI->mayStore() || MI->hasUnmodeledSideEffects()) {
+ hasSeenClobber = true;
+ return;
+ }
+
+ for (auto *MMO : MI->memoperands()) {
+ // Right now we don't want to worry about LLVM's memory model.
+ if (!MMO->isUnordered()) {
+ hasSeenClobber = true;
+ return;
+ }
+ }
+
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+
+ if (MO.isDef())
+ RegDefs.insert(MO.getReg());
+ else
+ RegUses.insert(MO.getReg());
+ }
+}
+
+bool HazardDetector::isSafeToHoist(MachineInstr *MI) {
+ assert(!isClobbered() && "isSafeToHoist cannot do anything useful!");
+
+ // Right now we don't want to worry about LLVM's memory model. This can be
+ // made more precise later.
+ for (auto *MMO : MI->memoperands())
+ if (!MMO->isUnordered())
+ return false;
+
+ for (auto &MO : MI->operands()) {
+ if (MO.isReg() && MO.getReg()) {
+ for (unsigned Reg : RegDefs)
+ if (TRI.regsOverlap(Reg, MO.getReg()))
+ return false; // We found a write-after-write or read-after-write
+
+ if (MO.isDef())
+ for (unsigned Reg : RegUses)
+ if (TRI.regsOverlap(Reg, MO.getReg()))
+ return false; // We found a write-after-read
+ }
+ }
+
+ return true;
}
bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
@@ -132,10 +225,10 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
- MDNode *BranchMD =
- MBB.getBasicBlock()
- ? MBB.getBasicBlock()->getTerminator()->getMetadata("make.implicit")
- : nullptr;
+ MDNode *BranchMD = nullptr;
+ if (auto *BB = MBB.getBasicBlock())
+ BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit);
+
if (!BranchMD)
return false;
@@ -188,7 +281,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
//
// we want to end up with
//
- // Def = TrappingLoad (%RAX + <offset>), LblNull
+ // Def = FaultingLoad (%RAX + <offset>), LblNull
// jmp LblNotNull ;; explicit or fallthrough
//
// LblNotNull:
@@ -199,38 +292,34 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
// LblNull:
// callq throw_NullPointerException
//
+ //
+ // To see why this is legal, consider the two possibilities:
+ //
+ // 1. %RAX is null: since we constrain <offset> to be less than PageSize, the
+ // load instruction dereferences the null page, causing a segmentation
+ // fault.
+ //
+ // 2. %RAX is not null: in this case we know that the load cannot fault, as
+ // otherwise the load would've faulted in the original program too and the
+ // original program would've been undefined.
+ //
+ // This reasoning cannot be extended to justify hoisting through arbitrary
+ // control flow. For instance, in the example below (in pseudo-C)
+ //
+ // if (ptr == null) { throw_npe(); unreachable; }
+ // if (some_cond) { return 42; }
+ // v = ptr->field; // LD
+ // ...
+ //
+ // we cannot (without code duplication) use the load marked "LD" to null check
+ // ptr -- clause (2) above does not apply in this case. In the above program
+ // the safety of ptr->field can be dependent on some_cond; and, for instance,
+ // ptr could be some non-null invalid reference that never gets loaded from
+ // because some_cond is always true.
unsigned PointerReg = MBP.LHS.getReg();
- // As we scan NotNullSucc for a suitable load instruction, we keep track of
- // the registers defined and used by the instructions we scan past. This bit
- // of information lets us decide if it is legal to hoist the load instruction
- // we find (if we do find such an instruction) to before NotNullSucc.
- DenseSet<unsigned> RegDefs, RegUses;
-
- // Returns true if it is safe to reorder MI to before NotNullSucc.
- auto IsSafeToHoist = [&](MachineInstr *MI) {
- // Right now we don't want to worry about LLVM's memory model. This can be
- // made more precise later.
- for (auto *MMO : MI->memoperands())
- if (!MMO->isUnordered())
- return false;
-
- for (auto &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg()) {
- for (unsigned Reg : RegDefs)
- if (TRI->regsOverlap(Reg, MO.getReg()))
- return false; // We found a write-after-write or read-after-write
-
- if (MO.isDef())
- for (unsigned Reg : RegUses)
- if (TRI->regsOverlap(Reg, MO.getReg()))
- return false; // We found a write-after-read
- }
- }
-
- return true;
- };
+ HazardDetector HD(*TRI);
for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
++MII) {
@@ -238,37 +327,16 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
unsigned BaseReg, Offset;
if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg &&
- Offset < PageSize && MI->getDesc().getNumDefs() == 1 &&
- IsSafeToHoist(MI)) {
+ Offset < PageSize && MI->getDesc().getNumDefs() <= 1 &&
+ HD.isSafeToHoist(MI)) {
NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc,
NullSucc);
return true;
}
- // MI did not match our criteria for conversion to a trapping load. Check
- // if we can continue looking.
-
- if (MI->mayStore() || MI->hasUnmodeledSideEffects())
+ HD.rememberInstruction(MI);
+ if (HD.isClobbered())
return false;
-
- for (auto *MMO : MI->memoperands())
- // Right now we don't want to worry about LLVM's memory model.
- if (!MMO->isUnordered())
- return false;
-
- // It _may_ be okay to reorder a later load instruction across MI. Make a
- // note of its operands so that we can make the legality check if we find a
- // suitable load instruction:
-
- for (auto &MO : MI->operands()) {
- if (!MO.isReg() || !MO.getReg())
- continue;
-
- if (MO.isDef())
- RegDefs.insert(MO.getReg());
- else
- RegUses.insert(MO.getReg());
- }
}
return false;
@@ -281,14 +349,19 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
MachineBasicBlock *MBB,
MCSymbol *HandlerLabel) {
+ const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
+ // all targets.
+
DebugLoc DL;
unsigned NumDefs = LoadMI->getDesc().getNumDefs();
- assert(NumDefs == 1 && "other cases unhandled!");
- (void)NumDefs;
+ assert(NumDefs <= 1 && "other cases unhandled!");
- unsigned DefReg = LoadMI->defs().begin()->getReg();
- assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
- "expected exactly one def!");
+ unsigned DefReg = NoRegister;
+ if (NumDefs != 0) {
+ DefReg = LoadMI->defs().begin()->getReg();
+ assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
+ "expected exactly one def!");
+ }
auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
.addSym(HandlerLabel)
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 9989f23..e310132 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -141,7 +141,7 @@ public:
InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
: MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
LSS(pass.getAnalysis<LiveStacks>()),
- AA(&pass.getAnalysis<AliasAnalysis>()),
+ AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
MDT(pass.getAnalysis<MachineDominatorTree>()),
Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
@@ -329,8 +329,8 @@ static raw_ostream &operator<<(raw_ostream &OS,
if (SVI.KillsSource)
OS << " kill";
OS << " deps[";
- for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i)
- OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def;
+ for (VNInfo *Dep : SVI.Deps)
+ OS << ' ' << Dep->id << '@' << Dep->def;
OS << " ]";
if (SVI.DefMI)
OS << " def: " << *SVI.DefMI;
@@ -383,9 +383,8 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
unsigned SpillDepth = ~0u;
- for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(),
- DepE = Deps->end(); DepI != DepE; ++DepI) {
- SibValueMap::iterator DepSVI = SibValues.find(*DepI);
+ for (VNInfo *Dep : *Deps) {
+ SibValueMap::iterator DepSVI = SibValues.find(Dep);
assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
SibValueInfo &DepSV = DepSVI->second;
if (!DepSV.SpillMBB)
@@ -566,12 +565,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
// Create entries for all the PHIs. Don't add them to the worklist, we
// are processing all of them in one go here.
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
- SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i])));
+ for (VNInfo *PHI : PHIs)
+ SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI)));
// Add every PHI as a dependent of all the non-PHIs.
- for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) {
- VNInfo *NonPHI = NonPHIs[i];
+ for (VNInfo *NonPHI : NonPHIs) {
// Known value? Try an insertion.
std::tie(SVI, Inserted) =
SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
@@ -654,8 +652,7 @@ void InlineSpiller::analyzeSiblingValues() {
return;
LiveInterval &OrigLI = LIS.getInterval(Original);
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
- unsigned Reg = RegsToSpill[i];
+ for (unsigned Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
VE = LI.vni_end(); VI != VE; ++VI) {
@@ -831,9 +828,8 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
if (VNI->isPHIDef()) {
MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI));
+ for (MachineBasicBlock *P : MBB->predecessors()) {
+ VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(P));
if (PVNI)
WorkList.push_back(std::make_pair(LI, PVNI));
}
@@ -920,8 +916,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
<< *LIS.getInstructionFromIndex(DefIdx));
// Replace operands
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
+ for (const auto &OpPair : Ops) {
+ MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
MO.setReg(NewVReg);
MO.setIsKill();
@@ -944,8 +940,7 @@ void InlineSpiller::reMaterializeAll() {
// Try to remat before all uses of snippets.
bool anyRemat = false;
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
- unsigned Reg = RegsToSpill[i];
+ for (unsigned Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
for (MachineRegisterInfo::reg_bundle_iterator
RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
@@ -963,8 +958,7 @@ void InlineSpiller::reMaterializeAll() {
return;
// Remove any values that were completely rematted.
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
- unsigned Reg = RegsToSpill[i];
+ for (unsigned Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I != E; ++I) {
@@ -989,8 +983,7 @@ void InlineSpiller::reMaterializeAll() {
// Get rid of deleted and empty intervals.
unsigned ResultPos = 0;
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
- unsigned Reg = RegsToSpill[i];
+ for (unsigned Reg : RegsToSpill) {
if (!LIS.hasInterval(Reg))
continue;
@@ -1098,9 +1091,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- unsigned Idx = Ops[i].second;
- assert(MI == Ops[i].first && "Instruction conflict during operand folding");
+ for (const auto &OpPair : Ops) {
+ unsigned Idx = OpPair.second;
+ assert(MI == OpPair.first && "Instruction conflict during operand folding");
MachineOperand &MO = MI->getOperand(Idx);
if (MO.isImplicit()) {
ImpReg = MO.getReg();
@@ -1139,7 +1132,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
continue;
MIBundleOperands::PhysRegInfo RI =
MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI);
- if (RI.Defines)
+ if (RI.FullyDefined)
continue;
// FoldMI does not define this physreg. Remove the LI segment.
assert(MO->isDead() && "Cannot fold physreg def");
@@ -1152,10 +1145,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
// Insert any new instructions other than FoldMI into the LIS maps.
assert(!MIS.empty() && "Unexpected empty span of instructions!");
- for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end();
- MII != End; ++MII)
- if (&*MII != FoldMI)
- LIS.InsertMachineInstrInMaps(&*MII);
+ for (MachineInstr &MI : MIS)
+ if (&MI != FoldMI)
+ LIS.InsertMachineInstrInMaps(&MI);
// TII.foldMemoryOperand may have left some implicit operands on the
// instruction. Strip them.
@@ -1301,11 +1293,11 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Rewrite instruction operands.
bool hasLiveDef = false;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
+ for (const auto &OpPair : Ops) {
+ MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
MO.setReg(NewVReg);
if (MO.isUse()) {
- if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
+ if (!OpPair.first->isRegTiedToDefOperand(OpPair.second))
MO.setIsKill();
} else {
if (!MO.isDead())
@@ -1335,14 +1327,14 @@ void InlineSpiller::spillAll() {
VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
- StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]),
+ for (unsigned Reg : RegsToSpill)
+ StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg),
StackInt->getValNumInfo(0));
DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
// Spill around uses of all RegsToSpill.
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
- spillAroundUses(RegsToSpill[i]);
+ for (unsigned Reg : RegsToSpill)
+ spillAroundUses(Reg);
// Hoisted spills may cause dead code.
if (!DeadDefs.empty()) {
@@ -1351,9 +1343,9 @@ void InlineSpiller::spillAll() {
}
// Finally delete the SnippetCopies.
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ for (unsigned Reg : RegsToSpill) {
for (MachineRegisterInfo::reg_instr_iterator
- RI = MRI.reg_instr_begin(RegsToSpill[i]), E = MRI.reg_instr_end();
+ RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end();
RI != E; ) {
MachineInstr *MI = &*(RI++);
assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
@@ -1364,8 +1356,8 @@ void InlineSpiller::spillAll() {
}
// Delete all spilled registers.
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
- Edit->eraseVirtReg(RegsToSpill[i]);
+ for (unsigned Reg : RegsToSpill)
+ Edit->eraseVirtReg(Reg);
}
void InlineSpiller::spill(LiveRangeEdit &edit) {
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
index fd5749b..f8cc247 100644
--- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -144,7 +144,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
PrevPos = Start;
}
- MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+ MachineFunction::const_iterator MFI =
+ MF->getBlockNumbered(MBBNum)->getIterator();
BlockInterference *BI = &Blocks[MBBNum];
ArrayRef<SlotIndex> RegMaskSlots;
ArrayRef<const uint32_t*> RegMaskBits;
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 53c8adc..724f1d6 100644
--- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -52,7 +52,7 @@ using namespace llvm;
static cl::opt<bool> LowerInterleavedAccesses(
"lower-interleaved-accesses",
cl::desc("Enable lowering interleaved accesses to intrinsics"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
static unsigned MaxFactor; // The maximum supported interleave factor.
@@ -271,7 +271,7 @@ bool InterleavedAccess::runOnFunction(Function &F) {
SmallVector<Instruction *, 32> DeadInsts;
bool Changed = false;
- for (auto &I : inst_range(F)) {
+ for (auto &I : instructions(F)) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I))
Changed |= lowerInterleavedLoad(LI, DeadInsts);
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 2c95e9e..2962f87 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -35,24 +35,24 @@ static void EnsureFunctionExists(Module &M, const char *Name,
M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
}
-static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+static void EnsureFPIntrinsicsExist(Module &M, Function &Fn,
const char *FName,
const char *DName, const char *LDName) {
// Insert definitions for all the floating point types.
- switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+ switch((int)Fn.arg_begin()->getType()->getTypeID()) {
case Type::FloatTyID:
- EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+ EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(),
Type::getFloatTy(M.getContext()));
break;
case Type::DoubleTyID:
- EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+ EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(),
Type::getDoubleTy(M.getContext()));
break;
case Type::X86_FP80TyID:
case Type::FP128TyID:
case Type::PPC_FP128TyID:
- EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
- Fn->arg_begin()->getType());
+ EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(),
+ Fn.arg_begin()->getType());
break;
}
}
@@ -67,7 +67,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
Type *RetTy) {
// If we haven't already looked up this function, check to see if the
// program already contains a function with this name.
- Module *M = CI->getParent()->getParent()->getParent();
+ Module *M = CI->getModule();
// Get or insert the definition now.
std::vector<Type *> ParamTys;
for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
@@ -75,7 +75,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
Constant* FCache = M->getOrInsertFunction(NewFn,
FunctionType::get(RetTy, ParamTys, false));
- IRBuilder<> Builder(CI->getParent(), CI);
+ IRBuilder<> Builder(CI->getParent(), CI->getIterator());
SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
CallInst *NewCI = Builder.CreateCall(FCache, Args);
NewCI->setName(CI->getName());
@@ -94,20 +94,20 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
void IntrinsicLowering::AddPrototypes(Module &M) {
LLVMContext &Context = M.getContext();
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->isDeclaration() && !I->use_empty())
- switch (I->getIntrinsicID()) {
+ for (auto &F : M)
+ if (F.isDeclaration() && !F.use_empty())
+ switch (F.getIntrinsicID()) {
default: break;
case Intrinsic::setjmp:
- EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(),
Type::getInt32Ty(M.getContext()));
break;
case Intrinsic::longjmp:
- EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(),
Type::getVoidTy(M.getContext()));
break;
case Intrinsic::siglongjmp:
- EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(),
Type::getVoidTy(M.getContext()));
break;
case Intrinsic::memcpy:
@@ -132,31 +132,31 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
DL.getIntPtrType(Context), nullptr);
break;
case Intrinsic::sqrt:
- EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+ EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
break;
case Intrinsic::sin:
- EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+ EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl");
break;
case Intrinsic::cos:
- EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+ EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl");
break;
case Intrinsic::pow:
- EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+ EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl");
break;
case Intrinsic::log:
- EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+ EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl");
break;
case Intrinsic::log2:
- EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+ EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l");
break;
case Intrinsic::log10:
- EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+ EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l");
break;
case Intrinsic::exp:
- EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+ EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl");
break;
case Intrinsic::exp2:
- EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+ EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l");
break;
}
}
@@ -167,8 +167,8 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
-
- IRBuilder<> Builder(IP->getParent(), IP);
+
+ IRBuilder<> Builder(IP);
switch(BitSize) {
default: llvm_unreachable("Unhandled type size of value to byteswap!");
@@ -268,7 +268,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
};
- IRBuilder<> Builder(IP->getParent(), IP);
+ IRBuilder<> Builder(IP);
unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
unsigned WordSize = (BitSize + 63) / 64;
@@ -301,7 +301,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
/// instruction IP.
static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
- IRBuilder<> Builder(IP->getParent(), IP);
+ IRBuilder<> Builder(IP);
unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
for (unsigned i = 1; i < BitSize; i <<= 1) {
@@ -338,7 +338,7 @@ static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
}
void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
- IRBuilder<> Builder(CI->getParent(), CI);
+ IRBuilder<> Builder(CI);
LLVMContext &Context = CI->getContext();
const Function *Callee = CI->getCalledFunction();
@@ -424,6 +424,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
+ case Intrinsic::get_dynamic_area_offset:
+ errs() << "WARNING: this target does not support the custom llvm.get."
+ "dynamic.area.offset. It is being lowered to a constant 0\n";
+ // Just lower it to a constant 0 because for most targets
+ // @llvm.get.dynamic.area.offset is lowered to zero.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0));
+ break;
case Intrinsic::returnaddress:
case Intrinsic::frameaddress:
errs() << "WARNING: this target does not support the llvm."
@@ -589,7 +596,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
return false;
// Okay, we can do this xform, do so now.
- Module *M = CI->getParent()->getParent()->getParent();
+ Module *M = CI->getModule();
Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
Value *Op = CI->getArgOperand(0);
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 37299eb..1c27377 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -82,7 +82,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
}
TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(BasicTTIImpl(this, F));
});
}
@@ -125,9 +125,10 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
PM.add(new MachineFunctionAnalysis(*TM, MFInitializer));
// Enable FastISel with -fast, but allow that to be overridden.
+ TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
if (EnableFastISelOption == cl::BOU_TRUE ||
(TM->getOptLevel() == CodeGenOpt::None &&
- EnableFastISelOption != cl::BOU_FALSE))
+ TM->getO0WantsFastISel()))
TM->setFastISel(true);
// Ask the target for an isel.
@@ -202,6 +203,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
Triple T(getTargetTriple().str());
AsmStreamer.reset(getTarget().createMCObjectStreamer(
T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
break;
}
@@ -254,6 +256,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
new file mode 100644
index 0000000..98d30b9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -0,0 +1,405 @@
+//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass implements a data flow analysis that propagates debug location
+/// information by inserting additional DBG_VALUE instructions into the machine
+/// instruction stream. The pass internally builds debug location liveness
+/// ranges to determine the points where additional DBG_VALUEs need to be
+/// inserted.
+///
+/// This is a separate pass from DbgValueHistoryCalculator to facilitate
+/// testing and improve modularity.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <deque>
+#include <list>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "live-debug-values"
+
+STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
+
+namespace {
+
+class LiveDebugValues : public MachineFunctionPass {
+
+private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ typedef std::pair<const DILocalVariable *, const DILocation *>
+ InlinedVariable;
+
+ /// A potentially inlined instance of a variable.
+ struct DebugVariable {
+ const DILocalVariable *Var;
+ const DILocation *InlinedAt;
+
+ DebugVariable(const DILocalVariable *_var, const DILocation *_inlinedAt)
+ : Var(_var), InlinedAt(_inlinedAt) {}
+
+ bool operator==(const DebugVariable &DV) const {
+ return (Var == DV.Var) && (InlinedAt == DV.InlinedAt);
+ }
+ };
+
+ /// Member variables and functions for Range Extension across basic blocks.
+ struct VarLoc {
+ DebugVariable Var;
+ const MachineInstr *MI; // MachineInstr should be a DBG_VALUE instr.
+
+ VarLoc(DebugVariable _var, const MachineInstr *_mi) : Var(_var), MI(_mi) {}
+
+ bool operator==(const VarLoc &V) const;
+ };
+
+ typedef std::list<VarLoc> VarLocList;
+ typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB;
+
+ bool OLChanged; // OutgoingLocs got changed for this bb.
+ bool MBBJoined; // The MBB was joined.
+
+ void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges);
+ void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges);
+ void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
+ VarLocInMBB &OutLocs);
+ void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
+
+ void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
+
+ bool ExtendRanges(MachineFunction &MF);
+
+public:
+ static char ID;
+
+ /// Default construct and initialize the pass.
+ LiveDebugValues();
+
+ /// Tell the pass manager which passes we depend on and what
+ /// information we preserve.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Print to ostream with a message.
+ void printVarLocInMBB(const VarLocInMBB &V, const char *msg,
+ raw_ostream &Out) const;
+
+ /// Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char LiveDebugValues::ID = 0;
+char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
+INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis",
+ false, false)
+
+/// Default construct and initialize the pass.
+LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
+ initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
+}
+
+/// Tell the pass manager which passes we depend on and what information we
+/// preserve.
+void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+// \brief If @MI is a DBG_VALUE with debug value described by a defined
+// register, returns the number of this register. In the other case, returns 0.
+static unsigned isDescribedByReg(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ assert(MI.getNumOperands() == 4);
+ // If location of variable is described using a register (directly or
+ // indirecltly), this register is always a first operand.
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+}
+
+// \brief This function takes two DBG_VALUE instructions and returns true
+// if their offsets are equal; otherwise returns false.
+static bool areOffsetsEqual(const MachineInstr &MI1, const MachineInstr &MI2) {
+ assert(MI1.isDebugValue());
+ assert(MI1.getNumOperands() == 4);
+
+ assert(MI2.isDebugValue());
+ assert(MI2.getNumOperands() == 4);
+
+ if (!MI1.isIndirectDebugValue() && !MI2.isIndirectDebugValue())
+ return true;
+
+ // Check if both MIs are indirect and they are equal.
+ if (MI1.isIndirectDebugValue() && MI2.isIndirectDebugValue())
+ return MI1.getOperand(1).getImm() == MI2.getOperand(1).getImm();
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Debug Range Extension Implementation
+//===----------------------------------------------------------------------===//
+
+void LiveDebugValues::printVarLocInMBB(const VarLocInMBB &V, const char *msg,
+ raw_ostream &Out) const {
+ Out << "Printing " << msg << ":\n";
+ for (const auto &L : V) {
+ Out << "MBB: " << L.first->getName() << ":\n";
+ for (const auto &VLL : L.second) {
+ Out << " Var: " << VLL.Var.Var->getName();
+ Out << " MI: ";
+ (*VLL.MI).dump();
+ Out << "\n";
+ }
+ }
+ Out << "\n";
+}
+
+bool LiveDebugValues::VarLoc::operator==(const VarLoc &V) const {
+ return (Var == V.Var) && (isDescribedByReg(*MI) == isDescribedByReg(*V.MI)) &&
+ (areOffsetsEqual(*MI, *V.MI));
+}
+
+/// End all previous ranges related to @MI and start a new range from @MI
+/// if it is a DBG_VALUE instr.
+void LiveDebugValues::transferDebugValue(MachineInstr &MI,
+ VarLocList &OpenRanges) {
+ if (!MI.isDebugValue())
+ return;
+ const DILocalVariable *RawVar = MI.getDebugVariable();
+ assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ DebugVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
+
+ // End all previous ranges of Var.
+ OpenRanges.erase(
+ std::remove_if(OpenRanges.begin(), OpenRanges.end(),
+ [&](const VarLoc &V) { return (Var == V.Var); }),
+ OpenRanges.end());
+
+ // Add Var to OpenRanges from this DBG_VALUE.
+ // TODO: Currently handles DBG_VALUE which has only reg as location.
+ if (isDescribedByReg(MI)) {
+ VarLoc V(Var, &MI);
+ OpenRanges.push_back(std::move(V));
+ }
+}
+
+/// A definition of a register may mark the end of a range.
+void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
+ VarLocList &OpenRanges) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!(MO.isReg() && MO.isDef() && MO.getReg() &&
+ TRI->isPhysicalRegister(MO.getReg())))
+ continue;
+ // Remove ranges of all aliased registers.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ OpenRanges.erase(std::remove_if(OpenRanges.begin(), OpenRanges.end(),
+ [&](const VarLoc &V) {
+ return (*RAI ==
+ isDescribedByReg(*V.MI));
+ }),
+ OpenRanges.end());
+ }
+}
+
+/// Terminate all open ranges at the end of the current basic block.
+void LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
+ VarLocList &OpenRanges,
+ VarLocInMBB &OutLocs) {
+ const MachineBasicBlock *CurMBB = MI.getParent();
+ if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back())))
+ return;
+
+ if (OpenRanges.empty())
+ return;
+
+ if (OutLocs.find(CurMBB) == OutLocs.end()) {
+ // Create space for new Outgoing locs entries.
+ VarLocList VLL;
+ OutLocs.insert(std::make_pair(CurMBB, std::move(VLL)));
+ }
+ auto OL = OutLocs.find(CurMBB);
+ assert(OL != OutLocs.end());
+ VarLocList &VLL = OL->second;
+
+ for (auto OR : OpenRanges) {
+ // Copy OpenRanges to OutLocs, if not already present.
+ assert(OR.MI->isDebugValue());
+ DEBUG(dbgs() << "Add to OutLocs: "; OR.MI->dump(););
+ if (std::find_if(VLL.begin(), VLL.end(),
+ [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) {
+ VLL.push_back(std::move(OR));
+ OLChanged = true;
+ }
+ }
+ OpenRanges.clear();
+}
+
+/// This routine creates OpenRanges and OutLocs.
+void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
+ VarLocInMBB &OutLocs) {
+ transferDebugValue(MI, OpenRanges);
+ transferRegisterDef(MI, OpenRanges);
+ transferTerminatorInst(MI, OpenRanges, OutLocs);
+}
+
+/// This routine joins the analysis results of all incoming edges in @MBB by
+/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
+/// source variable in all the predecessors of @MBB reside in the same location.
+void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
+ VarLocInMBB &InLocs) {
+ DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
+
+ MBBJoined = false;
+
+ VarLocList InLocsT; // Temporary incoming locations.
+
+ // For all predecessors of this MBB, find the set of VarLocs that can be
+ // joined.
+ for (auto p : MBB.predecessors()) {
+ auto OL = OutLocs.find(p);
+ // Join is null in case of empty OutLocs from any of the pred.
+ if (OL == OutLocs.end())
+ return;
+
+ // Just copy over the Out locs to incoming locs for the first predecessor.
+ if (p == *MBB.pred_begin()) {
+ InLocsT = OL->second;
+ continue;
+ }
+
+ // Join with this predecessor.
+ VarLocList &VLL = OL->second;
+ InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(),
+ [&](VarLoc &ILT) {
+ return (std::find_if(VLL.begin(), VLL.end(),
+ [&](const VarLoc &V) {
+ return (ILT == V);
+ }) == VLL.end());
+ }),
+ InLocsT.end());
+ }
+
+ if (InLocsT.empty())
+ return;
+
+ if (InLocs.find(&MBB) == InLocs.end()) {
+ // Create space for new Incoming locs entries.
+ VarLocList VLL;
+ InLocs.insert(std::make_pair(&MBB, std::move(VLL)));
+ }
+ auto IL = InLocs.find(&MBB);
+ assert(IL != InLocs.end());
+ VarLocList &ILL = IL->second;
+
+ // Insert DBG_VALUE instructions, if not already inserted.
+ for (auto ILT : InLocsT) {
+ if (std::find_if(ILL.begin(), ILL.end(), [&](const VarLoc &I) {
+ return (ILT == I);
+ }) == ILL.end()) {
+ // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a
+ // new range is started for the var from the mbb's beginning by inserting
+ // a new DBG_VALUE. transfer() will end this range however appropriate.
+ const MachineInstr *DMI = ILT.MI;
+ MachineInstr *MI =
+ BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
+ DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0,
+ DMI->getDebugVariable(), DMI->getDebugExpression());
+ if (DMI->isIndirectDebugValue())
+ MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+ DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ ++NumInserted;
+ MBBJoined = true; // rerun transfer().
+
+ VarLoc V(ILT.Var, MI);
+ ILL.push_back(std::move(V));
+ }
+ }
+}
+
+/// Calculate the liveness information for the given machine function and
+/// extend ranges across basic blocks.
+bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "\nDebug Range Extension\n");
+
+ bool Changed = false;
+ OLChanged = MBBJoined = false;
+
+ VarLocList OpenRanges; // Ranges that are open until end of bb.
+ VarLocInMBB OutLocs; // Ranges that exist beyond bb.
+ VarLocInMBB InLocs; // Ranges that are incoming after joining.
+
+ std::deque<MachineBasicBlock *> BBWorklist;
+
+ // Initialize every mbb with OutLocs.
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ transfer(MI, OpenRanges, OutLocs);
+ DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs()));
+
+ // Construct a worklist of MBBs.
+ for (auto &MBB : MF)
+ BBWorklist.push_back(&MBB);
+
+ // Perform join() and transfer() using the worklist until the ranges converge
+ // Ranges have converged when the worklist is empty.
+ while (!BBWorklist.empty()) {
+ MachineBasicBlock *MBB = BBWorklist.front();
+ BBWorklist.pop_front();
+
+ join(*MBB, OutLocs, InLocs);
+
+ if (MBBJoined) {
+ Changed = true;
+ for (auto &MI : *MBB)
+ transfer(MI, OpenRanges, OutLocs);
+ DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
+ DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
+
+ if (OLChanged) {
+ OLChanged = false;
+ for (auto s : MBB->successors())
+ if (std::find(BBWorklist.begin(), BBWorklist.end(), s) ==
+ BBWorklist.end()) // add if not already present.
+ BBWorklist.push_back(s);
+ }
+ }
+ }
+ DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs()));
+ DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs()));
+ return Changed;
+}
+
+bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ bool Changed = false;
+
+ Changed |= ExtendRanges(MF);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 1571551..6dac7db 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -91,9 +91,7 @@ public:
bool dominates(MachineBasicBlock *MBB) {
if (LBlocks.empty())
LS.getMachineBasicBlocks(DL, LBlocks);
- if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB))
- return true;
- return false;
+ return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
}
};
} // end anonymous namespace
@@ -512,7 +510,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
bool Changed = false;
for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
++MFI) {
- MachineBasicBlock *MBB = MFI;
+ MachineBasicBlock *MBB = &*MFI;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
MBBI != MBBE;) {
if (!MBBI->isDebugValue()) {
@@ -536,65 +534,49 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
return Changed;
}
-void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
- LiveRange *LR, const VNInfo *VNI,
- SmallVectorImpl<SlotIndex> *Kills,
+/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
+/// data-flow analysis to propagate them beyond basic block boundaries.
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
+ const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
UserValueScopes &UVS) {
- SmallVector<SlotIndex, 16> Todo;
- Todo.push_back(Idx);
- do {
- SlotIndex Start = Todo.pop_back_val();
- MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
- SlotIndex Stop = LIS.getMBBEndIdx(MBB);
- LocMap::iterator I = locInts.find(Start);
-
- // Limit to VNI's live range.
- bool ToEnd = true;
- if (LR && VNI) {
- LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
- if (!Segment || Segment->valno != VNI) {
- if (Kills)
- Kills->push_back(Start);
- continue;
- }
- if (Segment->end < Stop)
- Stop = Segment->end, ToEnd = false;
- }
-
- // There could already be a short def at Start.
- if (I.valid() && I.start() <= Start) {
- // Stop when meeting a different location or an already extended interval.
- Start = Start.getNextSlot();
- if (I.value() != LocNo || I.stop() != Start)
- continue;
- // This is a one-slot placeholder. Just skip it.
- ++I;
+ SlotIndex Start = Idx;
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to VNI's live range.
+ bool ToEnd = true;
+ if (LR && VNI) {
+ LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
+ if (!Segment || Segment->valno != VNI) {
+ if (Kills)
+ Kills->push_back(Start);
+ return;
}
+ if (Segment->end < Stop)
+ Stop = Segment->end, ToEnd = false;
+ }
- // Limited by the next def.
- if (I.valid() && I.start() < Stop)
- Stop = I.start(), ToEnd = false;
- // Limited by VNI's live range.
- else if (!ToEnd && Kills)
- Kills->push_back(Stop);
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != LocNo || I.stop() != Start)
+ return;
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
- if (Start >= Stop)
- continue;
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop)
+ Stop = I.start(), ToEnd = false;
+ // Limited by VNI's live range.
+ else if (!ToEnd && Kills)
+ Kills->push_back(Stop);
+ if (Start < Stop)
I.insert(Start, Stop, LocNo);
-
- // If we extended to the MBB end, propagate down the dominator tree.
- if (!ToEnd)
- continue;
- const std::vector<MachineDomTreeNode*> &Children =
- MDT.getNode(MBB)->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Children[i]->getBlock();
- if (UVS.dominates(MBB))
- Todo.push_back(LIS.getMBBStartIdx(MBB));
- }
- } while (!Todo.empty());
}
void
@@ -763,7 +745,7 @@ static void removeDebugValues(MachineFunction &mf) {
bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
if (!EnableLDV)
return false;
- if (!FunctionDIs.count(mf.getFunction())) {
+ if (!mf.getFunction()->getSubprogram()) {
removeDebugValues(mf);
return false;
}
@@ -1004,11 +986,11 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
SlotIndex Stop = I.stop();
unsigned LocNo = I.value();
DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
- MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
- SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
- insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
// This interval may span multiple basic blocks.
// Insert a DBG_VALUE into each one.
while(Stop > MBBEnd) {
@@ -1016,9 +998,9 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
Start = MBBEnd;
if (++MBB == MFEnd)
break;
- MBBEnd = LIS.getMBBEndIdx(MBB);
+ MBBEnd = LIS.getMBBEndIdx(&*MBB);
DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
- insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
}
DEBUG(dbgs() << '\n');
if (MBB == MFEnd)
@@ -1047,7 +1029,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
}
bool LiveDebugVariables::doInitialization(Module &M) {
- FunctionDIs = makeSubprogramMap(M);
return Pass::doInitialization(M);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
index 694aa17..3d36f4d 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -33,7 +33,6 @@ class VirtRegMap;
class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
void *pImpl;
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
public:
static char ID; // Pass identification, replacement for typeid
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index d75e441..efad36f 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -26,7 +26,6 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
@@ -865,7 +864,7 @@ void LiveInterval::constructMainRangeFromSubranges(
// - If any of the subranges is live at a point the main liverange has to be
// live too, conversily if no subrange is live the main range mustn't be
// live either.
- // We do this by scannig through all the subranges simultaneously creating new
+ // We do this by scanning through all the subranges simultaneously creating new
// segments in the main range as segments start/ends come up in the subranges.
assert(hasSubRanges() && "expected subranges to be present");
assert(segments.empty() && valnos.empty() && "expected empty main range");
@@ -889,7 +888,7 @@ void LiveInterval::constructMainRangeFromSubranges(
Segment CurrentSegment;
bool ConstructingSegment = false;
bool NeedVNIFixup = false;
- unsigned ActiveMask = 0;
+ LaneBitmask ActiveMask = 0;
SlotIndex Pos = First;
while (true) {
SlotIndex NextPos = Last;
@@ -899,7 +898,7 @@ void LiveInterval::constructMainRangeFromSubranges(
END_SEGMENT,
} Event = NOTHING;
// Which subregister lanes are affected by the current event.
- unsigned EventMask = 0;
+ LaneBitmask EventMask = 0;
// Whether a BEGIN_SEGMENT is also a valno definition point.
bool IsDef = false;
// Find the next begin or end of a subrange segment. Combine masks if we
@@ -1066,7 +1065,7 @@ void LiveInterval::print(raw_ostream &OS) const {
super::print(OS);
// Print subranges
for (const SubRange &SR : subranges()) {
- OS << format(" L%04X ", SR.LaneMask) << SR;
+ OS << " L" << PrintLaneMask(SR.LaneMask) << ' ' << SR;
}
}
@@ -1101,8 +1100,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
super::verify();
// Make sure SubRanges are fine and LaneMasks are disjunct.
- unsigned Mask = 0;
- unsigned MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
+ LaneBitmask Mask = 0;
+ LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
for (const SubRange &SR : subranges()) {
// Subrange lanemask should be disjunct to any previous subrange masks.
assert((Mask & SR.LaneMask) == 0);
@@ -1110,6 +1109,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
// subrange mask should not contained in maximum lane mask for the vreg.
assert((Mask & ~MaxMask) == 0);
+ // empty subranges must be removed.
+ assert(!SR.empty());
SR.verify();
// Main liverange should cover subrange.
@@ -1370,11 +1371,42 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
return EqClass.getNumClasses();
}
-void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
- MachineRegisterInfo &MRI) {
- assert(LIV[0] && "LIV[0] must be set");
- LiveInterval &LI = *LIV[0];
+template<typename LiveRangeT, typename EqClassesT>
+static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[],
+ EqClassesT VNIClasses) {
+ // Move segments to new intervals.
+ LiveRange::iterator J = LR.begin(), E = LR.end();
+ while (J != E && VNIClasses[J->valno->id] == 0)
+ ++J;
+ for (LiveRange::iterator I = J; I != E; ++I) {
+ if (unsigned eq = VNIClasses[I->valno->id]) {
+ assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ SplitLRs[eq-1]->segments.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LR.segments.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LR.getNumValNums();
+ while (j != e && VNIClasses[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LR.getValNumInfo(i);
+ if (unsigned eq = VNIClasses[i]) {
+ VNI->id = SplitLRs[eq-1]->getNumValNums();
+ SplitLRs[eq-1]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LR.valnos[j++] = VNI;
+ }
+ }
+ LR.valnos.resize(j);
+}
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
+ MachineRegisterInfo &MRI) {
// Rewrite instructions.
for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
RE = MRI.reg_end(); RI != RE;) {
@@ -1396,38 +1428,41 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
// NULL. If the use is tied to a def, VNI will be the defined value.
if (!VNI)
continue;
- MO.setReg(LIV[getEqClass(VNI)]->reg);
- }
-
- // Move runs to new intervals.
- LiveInterval::iterator J = LI.begin(), E = LI.end();
- while (J != E && EqClass[J->valno->id] == 0)
- ++J;
- for (LiveInterval::iterator I = J; I != E; ++I) {
- if (unsigned eq = EqClass[I->valno->id]) {
- assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
- "New intervals should be empty");
- LIV[eq]->segments.push_back(*I);
- } else
- *J++ = *I;
+ if (unsigned EqClass = getEqClass(VNI))
+ MO.setReg(LIV[EqClass-1]->reg);
}
- // TODO: do not cheat anymore by simply cleaning all subranges
- LI.clearSubRanges();
- LI.segments.erase(J, E);
- // Transfer VNInfos to their new owners and renumber them.
- unsigned j = 0, e = LI.getNumValNums();
- while (j != e && EqClass[j] == 0)
- ++j;
- for (unsigned i = j; i != e; ++i) {
- VNInfo *VNI = LI.getValNumInfo(i);
- if (unsigned eq = EqClass[i]) {
- VNI->id = LIV[eq]->getNumValNums();
- LIV[eq]->valnos.push_back(VNI);
- } else {
- VNI->id = j;
- LI.valnos[j++] = VNI;
+ // Distribute subregister liveranges.
+ if (LI.hasSubRanges()) {
+ unsigned NumComponents = EqClass.getNumClasses();
+ SmallVector<unsigned, 8> VNIMapping;
+ SmallVector<LiveInterval::SubRange*, 8> SubRanges;
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ // Create new subranges in the split intervals and construct a mapping
+ // for the VNInfos in the subrange.
+ unsigned NumValNos = SR.valnos.size();
+ VNIMapping.clear();
+ VNIMapping.reserve(NumValNos);
+ SubRanges.clear();
+ SubRanges.resize(NumComponents-1, nullptr);
+ for (unsigned I = 0; I < NumValNos; ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def);
+ assert(MainRangeVNI != nullptr
+ && "SubRange def must have corresponding main range def");
+ unsigned ComponentNum = getEqClass(MainRangeVNI);
+ VNIMapping.push_back(ComponentNum);
+ if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) {
+ SubRanges[ComponentNum-1]
+ = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask);
+ }
+ }
+ DistributeRange(SR, SubRanges.data(), VNIMapping);
}
+ LI.removeEmptySubRanges();
}
- LI.valnos.resize(j);
+
+ // Distribute main liverange.
+ DistributeRange(LI, LIV, EqClass);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index c00b010..9451d92 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -32,7 +32,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -48,7 +47,7 @@ char LiveIntervals::ID = 0;
char &llvm::LiveIntervalsID = LiveIntervals::ID;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
@@ -76,8 +75,8 @@ cl::opt<bool> UseSegmentSetForPhysRegs(
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
// LiveVariables isn't really required by this analysis, it is only required
// here to make sure it is live during TwoAddressInstructionPass and
// PHIElimination. This is temporary.
@@ -124,7 +123,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
MRI = &MF->getRegInfo();
TRI = MF->getSubtarget().getRegisterInfo();
TII = MF->getSubtarget().getInstrInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
@@ -198,9 +197,16 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
+ bool ShouldTrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(LI.reg);
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
- computeDeadValues(LI, nullptr);
+ LRCalc->calculate(LI, ShouldTrackSubRegLiveness);
+ bool SeparatedComponents = computeDeadValues(LI, nullptr);
+ if (SeparatedComponents) {
+ assert(ShouldTrackSubRegLiveness
+ && "Separated components should only occur for unused subreg defs");
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ splitSeparateComponents(LI, SplitLIs);
+ }
}
void LiveIntervals::computeVirtRegs() {
@@ -216,19 +222,31 @@ void LiveIntervals::computeRegMasks() {
RegMaskBlocks.resize(MF->getNumBlockIDs());
// Find all instructions with regmask operands.
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
- std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ for (MachineBasicBlock &MBB : *MF) {
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()];
RMB.first = RegMaskSlots.size();
- for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
- MI != ME; ++MI)
- for (const MachineOperand &MO : MI->operands()) {
+
+ // Some block starts, such as EH funclets, create masks.
+ if (const uint32_t *Mask = MBB.getBeginClobberMask(TRI)) {
+ RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB));
+ RegMaskBits.push_back(Mask);
+ }
+
+ for (MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
continue;
- RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
- RegMaskBits.push_back(MO.getRegMask());
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(&MI).getRegSlot());
+ RegMaskBits.push_back(MO.getRegMask());
}
+ }
+
+ // Some block ends, such as funclet returns, create masks.
+ if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) {
+ RegMaskSlots.push_back(Indexes->getMBBEndIdx(&MBB));
+ RegMaskBits.push_back(Mask);
+ }
+
// Compute the number of register mask instructions in this block.
RMB.second = RegMaskSlots.size() - RMB.first;
}
@@ -296,18 +314,17 @@ void LiveIntervals::computeLiveInRegUnits() {
// Check all basic blocks for live-ins.
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
- const MachineBasicBlock *MBB = MFI;
+ const MachineBasicBlock *MBB = &*MFI;
// We only care about ABI blocks: Entry + landing pads.
- if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty())
+ if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty())
continue;
// Create phi-defs at Begin for all live-in registers.
SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
- for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(),
- LIE = MBB->livein_end(); LII != LIE; ++LII) {
- for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) {
+ for (const auto &LI : MBB->liveins()) {
+ for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = *Units;
LiveRange *LR = RegUnitRanges[Unit];
if (!LR) {
@@ -396,9 +413,6 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
}
}
-/// shrinkToUses - After removing some uses of a register, shrink its live
-/// range to just the remaining uses. This method does not compute reaching
-/// defs for new uses, and it doesn't remove dead defs.
bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
DEBUG(dbgs() << "Shrink: " << *li << '\n');
@@ -406,9 +420,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
&& "Can only shrink virtual registers");
// Shrink subregister live ranges.
+ bool NeedsCleanup = false;
for (LiveInterval::SubRange &S : li->subranges()) {
shrinkToUses(S, li->reg);
+ if (S.empty())
+ NeedsCleanup = true;
}
+ if (NeedsCleanup)
+ li->removeEmptySubRanges();
// Find all the values used, including PHI kills.
ShrinkToUsesWorkList WorkList;
@@ -456,7 +475,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
bool LiveIntervals::computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead) {
- bool PHIRemoved = false;
+ bool MayHaveSplitComponents = false;
for (auto VNI : LI.valnos) {
if (VNI->isUnused())
continue;
@@ -466,10 +485,13 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// Is the register live before? Otherwise we may have to add a read-undef
// flag for subregister defs.
- if (MRI->shouldTrackSubRegLiveness(LI.reg)) {
+ bool DeadBeforeDef = false;
+ unsigned VReg = LI.reg;
+ if (MRI->shouldTrackSubRegLiveness(VReg)) {
if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
MachineInstr *MI = getInstructionFromIndex(Def);
- MI->addRegisterDefReadUndef(LI.reg);
+ MI->setRegisterDefReadUndef(VReg);
+ DeadBeforeDef = true;
}
}
@@ -480,19 +502,27 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
VNI->markUnused();
LI.removeSegment(I);
DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
- PHIRemoved = true;
+ MayHaveSplitComponents = true;
} else {
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(Def);
assert(MI && "No instruction defining live value");
- MI->addRegisterDead(LI.reg, TRI);
+ MI->addRegisterDead(VReg, TRI);
+
+ // If we have a dead def that is completely separate from the rest of
+ // the liverange then we rewrite it to use a different VReg to not violate
+ // the rule that the liveness of a virtual register forms a connected
+ // component. This should only happen if subregister liveness is tracked.
+ if (DeadBeforeDef)
+ MayHaveSplitComponents = true;
+
if (dead && MI->allDefsAreDead()) {
DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
dead->push_back(MI);
}
}
}
- return PHIRemoved;
+ return MayHaveSplitComponents;
}
void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
@@ -512,8 +542,8 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
// Maybe the operand is for a subregister we don't care about.
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
- unsigned SubRegMask = TRI->getSubRegIndexLaneMask(SubReg);
- if ((SubRegMask & SR.LaneMask) == 0)
+ LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((LaneMask & SR.LaneMask) == 0)
continue;
}
// We only need to visit each instruction once.
@@ -712,7 +742,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0
// are actually never written by %vreg2. After assignment the <kill>
// flag at the read instruction is invalid.
- unsigned DefinedLanesMask;
+ LaneBitmask DefinedLanesMask;
if (!SRs.empty()) {
// Compute a mask of lanes that are defined.
DefinedLanesMask = 0;
@@ -736,7 +766,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
continue;
if (MO.isUse()) {
// Reading any undefined lanes?
- unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
if ((UseMask & ~DefinedLanesMask) != 0)
goto CancelKill;
} else if (MO.getSubReg() == 0) {
@@ -944,7 +974,7 @@ public:
LiveInterval &LI = LIS.getInterval(Reg);
if (LI.hasSubRanges()) {
unsigned SubReg = MO.getSubReg();
- unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
for (LiveInterval::SubRange &S : LI.subranges()) {
if ((S.LaneMask & LaneMask) == 0)
continue;
@@ -968,7 +998,7 @@ public:
private:
/// Update a single live range, assuming an instruction has been moved from
/// OldIdx to NewIdx.
- void updateRange(LiveRange &LR, unsigned Reg, unsigned LaneMask) {
+ void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
if (!Updated.insert(&LR).second)
return;
DEBUG({
@@ -976,7 +1006,7 @@ private:
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
dbgs() << PrintReg(Reg);
if (LaneMask != 0)
- dbgs() << format(" L%04X", LaneMask);
+ dbgs() << " L" << PrintLaneMask(LaneMask);
} else {
dbgs() << PrintRegUnit(Reg, &TRI);
}
@@ -1098,7 +1128,7 @@ private:
/// Hoist kill to NewIdx, then scan for last kill between NewIdx and
/// OldIdx.
///
- void handleMoveUp(LiveRange &LR, unsigned Reg, unsigned LaneMask) {
+ void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
// First look for a kill at OldIdx.
LiveRange::iterator I = LR.find(OldIdx.getBaseIndex());
LiveRange::iterator E = LR.end();
@@ -1175,7 +1205,7 @@ private:
}
// Return the last use of reg between NewIdx and OldIdx.
- SlotIndex findLastUseBefore(unsigned Reg, unsigned LaneMask) {
+ SlotIndex findLastUseBefore(unsigned Reg, LaneBitmask LaneMask) {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
SlotIndex LastUse = NewIdx;
@@ -1255,7 +1285,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
const MachineBasicBlock::iterator End,
const SlotIndex endIdx,
LiveRange &LR, const unsigned Reg,
- const unsigned LaneMask) {
+ LaneBitmask LaneMask) {
LiveInterval::iterator LII = LR.find(endIdx);
SlotIndex lastUseIdx;
if (LII != LR.end() && LII->start < endIdx)
@@ -1282,7 +1312,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
continue;
unsigned SubReg = MO.getSubReg();
- unsigned Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
if ((Mask & LaneMask) == 0)
continue;
@@ -1412,3 +1442,20 @@ void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
}
LI.removeEmptySubRanges();
}
+
+void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
+ SmallVectorImpl<LiveInterval*> &SplitLIs) {
+ ConnectedVNInfoEqClasses ConEQ(*this);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp <= 1)
+ return;
+ DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
+ unsigned Reg = LI.reg;
+ const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
+ for (unsigned I = 1; I < NumComp; ++I) {
+ unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+ LiveInterval &NewLI = createEmptyInterval(NewVReg);
+ SplitLIs.push_back(&NewLI);
+ }
+ ConEQ.Distribute(LI, SplitLIs.data(), *MRI);
+}
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index cbd98e3..efbbcbe 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -68,7 +68,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
/// Simulates liveness when stepping forward over an instruction(bundle): Remove
/// killed-uses, add defs. This is the not recommended way, because it depends
-/// on accurate kill flags. If possible use stepBackwards() instead of this
+/// on accurate kill flags. If possible use stepBackward() instead of this
/// function.
void LivePhysRegs::stepForward(const MachineInstr &MI,
SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
@@ -128,8 +128,8 @@ void LivePhysRegs::dump() const {
/// Add live-in registers of basic block \p MBB to \p LiveRegs.
static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
- for (unsigned Reg : make_range(MBB.livein_begin(), MBB.livein_end()))
- LiveRegs.addReg(Reg);
+ for (const auto &LI : MBB.liveins())
+ LiveRegs.addReg(LI.PhysReg);
}
/// Add pristine registers to the given \p LiveRegs. This function removes
@@ -147,11 +147,19 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
}
void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB,
- bool AddPristines) {
- if (AddPristines) {
+ bool AddPristinesAndCSRs) {
+ if (AddPristinesAndCSRs) {
const MachineFunction &MF = *MBB->getParent();
addPristines(*this, MF, *TRI);
+ if (!MBB->isReturnBlock()) {
+ // The return block has no successors whose live-ins we could merge
+ // below. So instead we add the callee saved registers manually.
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ addReg(*I);
+ }
}
+
+ // To get the live-outs we simply merge the live-ins of all successors.
for (const MachineBasicBlock *Succ : MBB->successors())
::addLiveIns(*this, *Succ);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index bb2877a..c408615 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -64,23 +64,23 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
unsigned SubReg = MO.getSubReg();
if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
- unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
- : MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
// If this is the first time we see a subregister def, initialize
// subranges by creating a copy of the main range.
if (!LI.hasSubRanges() && !LI.empty()) {
- unsigned ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
LI.createSubRangeFrom(*Alloc, ClassMask, LI);
}
for (LiveInterval::SubRange &S : LI.subranges()) {
// A Mask for subregs common to the existing subrange and current def.
- unsigned Common = S.LaneMask & Mask;
+ LaneBitmask Common = S.LaneMask & Mask;
if (Common == 0)
continue;
// A Mask for subregs covered by the subrange but not the current def.
- unsigned LRest = S.LaneMask & ~Mask;
+ LaneBitmask LRest = S.LaneMask & ~Mask;
LiveInterval::SubRange *CommonRange;
if (LRest != 0) {
// Split current subrange into Common and LRest ranges.
@@ -138,7 +138,8 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
}
-void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) {
+void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
+ LaneBitmask Mask) {
// Visit all operands that read Reg. This may include partial defs.
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
@@ -157,7 +158,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) {
continue;
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
- unsigned SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
// Ignore uses not covering the current subrange.
if ((SubRegMask & Mask) == 0)
continue;
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index 34d9953..ff38c68 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -129,7 +129,7 @@ class LiveRangeCalc {
///
/// All uses must be jointly dominated by existing liveness. PHI-defs are
/// inserted as needed to preserve SSA form.
- void extendToUses(LiveRange &LR, unsigned Reg, unsigned LaneMask);
+ void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask);
/// Reset Map and Seen fields.
void resetLiveOutMap();
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 08bbe0c..5ce364a 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -226,7 +226,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
return true;
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
unsigned SubReg = MO.getSubReg();
- unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
for (const LiveInterval::SubRange &S : LI.subranges()) {
if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill())
return true;
@@ -349,8 +349,9 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
ToShrink.pop_back();
if (foldAsLoad(LI, Dead))
continue;
+ unsigned VReg = LI->reg;
if (TheDelegate)
- TheDelegate->LRE_WillShrinkVirtReg(LI->reg);
+ TheDelegate->LRE_WillShrinkVirtReg(VReg);
if (!LIS.shrinkToUses(LI, &Dead))
continue;
@@ -360,7 +361,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// them results in incorrect code.
bool BeingSpilled = false;
for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
- if (LI->reg == RegsBeingSpilled[i]) {
+ if (VReg == RegsBeingSpilled[i]) {
BeingSpilled = true;
break;
}
@@ -370,29 +371,21 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// LI may have been separated, create new intervals.
LI->RenumberValues();
- ConnectedVNInfoEqClasses ConEQ(LIS);
- unsigned NumComp = ConEQ.Classify(LI);
- if (NumComp <= 1)
- continue;
- ++NumFracRanges;
- bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg;
- DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
- SmallVector<LiveInterval*, 8> Dups(1, LI);
- for (unsigned i = 1; i != NumComp; ++i) {
- Dups.push_back(&createEmptyIntervalFrom(LI->reg));
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS.splitSeparateComponents(*LI, SplitLIs);
+ if (!SplitLIs.empty())
+ ++NumFracRanges;
+
+ unsigned Original = VRM ? VRM->getOriginal(VReg) : 0;
+ for (const LiveInterval *SplitLI : SplitLIs) {
// If LI is an original interval that hasn't been split yet, make the new
// intervals their own originals instead of referring to LI. The original
// interval must contain all the split products, and LI doesn't.
- if (IsOriginal)
- VRM->setIsSplitFromReg(Dups.back()->reg, 0);
+ if (Original != VReg && Original != 0)
+ VRM->setIsSplitFromReg(SplitLI->reg, Original);
if (TheDelegate)
- TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+ TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg);
}
- ConEQ.Distribute(&Dups[0], MRI);
- DEBUG({
- for (unsigned i = 0; i != NumComp; ++i)
- dbgs() << '\t' << *Dups[i] << '\n';
- });
}
}
@@ -411,7 +404,7 @@ void
LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
const MachineLoopInfo &Loops,
const MachineBlockFrequencyInfo &MBFI) {
- VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI);
+ VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI);
for (unsigned I = 0, Size = size(); I < Size; ++I) {
LiveInterval &LI = LIS.getInterval(get(I));
if (MRI.recomputeRegClass(LI.reg))
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 9ea031d..7ee87c1 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -15,12 +15,11 @@
#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -49,7 +48,6 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
- MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
VRM = &getAnalysis<VirtRegMap>();
@@ -78,7 +76,7 @@ bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval,
if (VRegInterval.hasSubRanges()) {
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = (*Units).first;
- unsigned Mask = (*Units).second;
+ LaneBitmask Mask = (*Units).second;
for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
if (S.LaneMask & Mask) {
if (Func(Unit, S))
@@ -101,7 +99,6 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
<< " to " << PrintReg(PhysReg, TRI) << ':');
assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
- MRI->setPhysRegUsed(PhysReg);
foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
const LiveRange &Range) {
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index b355393..06b86d8 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -522,11 +522,15 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
continue;
unsigned MOReg = MO.getReg();
if (MO.isUse()) {
- MO.setIsKill(false);
+ if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ MRI->isReserved(MOReg)))
+ MO.setIsKill(false);
if (MO.readsReg())
UseRegs.push_back(MOReg);
} else /*MO.isDef()*/ {
- MO.setIsDead(false);
+ if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ MRI->isReserved(MOReg)))
+ MO.setIsDead(false);
DefRegs.push_back(MOReg);
}
}
@@ -559,11 +563,10 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
// Mark live-in registers as live-in.
SmallVector<unsigned, 4> Defs;
- for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
- EE = MBB->livein_end(); II != EE; ++II) {
- assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ for (const auto &LI : MBB->liveins()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) &&
"Cannot have a live-in virtual register!");
- HandlePhysRegDef(*II, nullptr, Defs);
+ HandlePhysRegDef(LI.PhysReg, nullptr, Defs);
}
// Loop over all of the instructions, processing them.
@@ -599,14 +602,12 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI) {
MachineBasicBlock *SuccMBB = *SI;
- if (SuccMBB->isLandingPad())
+ if (SuccMBB->isEHPad())
continue;
- for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
- LE = SuccMBB->livein_end(); LI != LE; ++LI) {
- unsigned LReg = *LI;
- if (!TRI->isInAllocatableClass(LReg))
+ for (const auto &LI : SuccMBB->liveins()) {
+ if (!TRI->isInAllocatableClass(LI.PhysReg))
// Ignore other live-ins, e.g. those that are live into landing pads.
- LiveOuts.insert(LReg);
+ LiveOuts.insert(LI.PhysReg);
}
}
@@ -640,7 +641,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// function. This guarantees that we will see the definition of a virtual
// register before its uses due to dominance properties of SSA (except for PHI
// nodes, which are treated as a special case).
- MachineBasicBlock *Entry = MF->begin();
+ MachineBasicBlock *Entry = &MF->front();
SmallPtrSet<MachineBasicBlock*,16> Visited;
for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 8378429..eb60005 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -325,7 +325,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Sort the frame references by local offset
array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
- MachineBasicBlock *Entry = Fn.begin();
+ MachineBasicBlock *Entry = &Fn.front();
unsigned BaseReg = 0;
int64_t BaseOffset = 0;
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 482c33a..28f9d4e 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MILexer.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include <cctype>
@@ -54,15 +55,132 @@ public:
} // end anonymous namespace
+MIToken &MIToken::reset(TokenKind Kind, StringRef Range) {
+ this->Kind = Kind;
+ this->Range = Range;
+ return *this;
+}
+
+MIToken &MIToken::setStringValue(StringRef StrVal) {
+ StringValue = StrVal;
+ return *this;
+}
+
+MIToken &MIToken::setOwnedStringValue(std::string StrVal) {
+ StringValueStorage = std::move(StrVal);
+ StringValue = StringValueStorage;
+ return *this;
+}
+
+MIToken &MIToken::setIntegerValue(APSInt IntVal) {
+ this->IntVal = std::move(IntVal);
+ return *this;
+}
+
/// Skip the leading whitespace characters and return the updated cursor.
static Cursor skipWhitespace(Cursor C) {
- while (isspace(C.peek()))
+ while (isblank(C.peek()))
+ C.advance();
+ return C;
+}
+
+static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; }
+
+/// Skip a line comment and return the updated cursor.
+static Cursor skipComment(Cursor C) {
+ if (C.peek() != ';')
+ return C;
+ while (!isNewlineChar(C.peek()) && !C.isEOF())
C.advance();
return C;
}
+/// Return true if the given character satisfies the following regular
+/// expression: [-a-zA-Z$._0-9]
static bool isIdentifierChar(char C) {
- return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
+ return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' ||
+ C == '$';
+}
+
+/// Unescapes the given string value.
+///
+/// Expects the string value to be quoted.
+static std::string unescapeQuotedString(StringRef Value) {
+ assert(Value.front() == '"' && Value.back() == '"');
+ Cursor C = Cursor(Value.substr(1, Value.size() - 2));
+
+ std::string Str;
+ Str.reserve(C.remaining().size());
+ while (!C.isEOF()) {
+ char Char = C.peek();
+ if (Char == '\\') {
+ if (C.peek(1) == '\\') {
+ // Two '\' become one
+ Str += '\\';
+ C.advance(2);
+ continue;
+ }
+ if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
+ Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
+ C.advance(3);
+ continue;
+ }
+ }
+ Str += Char;
+ C.advance();
+ }
+ return Str;
+}
+
+/// Lex a string constant using the following regular expression: \"[^\"]*\"
+static Cursor lexStringConstant(
+ Cursor C,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ assert(C.peek() == '"');
+ for (C.advance(); C.peek() != '"'; C.advance()) {
+ if (C.isEOF() || isNewlineChar(C.peek())) {
+ ErrorCallback(
+ C.location(),
+ "end of machine instruction reached before the closing '\"'");
+ return None;
+ }
+ }
+ C.advance();
+ return C;
+}
+
+static Cursor lexName(
+ Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ auto Range = C;
+ C.advance(PrefixLength);
+ if (C.peek() == '"') {
+ if (Cursor R = lexStringConstant(C, ErrorCallback)) {
+ StringRef String = Range.upto(R);
+ Token.reset(Type, String)
+ .setOwnedStringValue(
+ unescapeQuotedString(String.drop_front(PrefixLength)));
+ return R;
+ }
+ Token.reset(MIToken::Error, Range.remaining());
+ return Range;
+ }
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ Token.reset(Type, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(PrefixLength));
+ return C;
+}
+
+static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) {
+ if (C.peek() != 'i' || !isdigit(C.peek(1)))
+ return None;
+ auto Range = C;
+ C.advance(); // Skip 'i'
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::IntegerType, Range.upto(C));
+ return C;
}
static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
@@ -70,32 +188,70 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("_", MIToken::underscore)
.Case("implicit", MIToken::kw_implicit)
.Case("implicit-def", MIToken::kw_implicit_define)
+ .Case("def", MIToken::kw_def)
.Case("dead", MIToken::kw_dead)
.Case("killed", MIToken::kw_killed)
.Case("undef", MIToken::kw_undef)
+ .Case("internal", MIToken::kw_internal)
+ .Case("early-clobber", MIToken::kw_early_clobber)
+ .Case("debug-use", MIToken::kw_debug_use)
+ .Case("tied-def", MIToken::kw_tied_def)
+ .Case("frame-setup", MIToken::kw_frame_setup)
+ .Case("debug-location", MIToken::kw_debug_location)
+ .Case(".cfi_same_value", MIToken::kw_cfi_same_value)
+ .Case(".cfi_offset", MIToken::kw_cfi_offset)
+ .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register)
+ .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+ .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa)
+ .Case("blockaddress", MIToken::kw_blockaddress)
+ .Case("target-index", MIToken::kw_target_index)
+ .Case("half", MIToken::kw_half)
+ .Case("float", MIToken::kw_float)
+ .Case("double", MIToken::kw_double)
+ .Case("x86_fp80", MIToken::kw_x86_fp80)
+ .Case("fp128", MIToken::kw_fp128)
+ .Case("ppc_fp128", MIToken::kw_ppc_fp128)
+ .Case("target-flags", MIToken::kw_target_flags)
+ .Case("volatile", MIToken::kw_volatile)
+ .Case("non-temporal", MIToken::kw_non_temporal)
+ .Case("invariant", MIToken::kw_invariant)
+ .Case("align", MIToken::kw_align)
+ .Case("stack", MIToken::kw_stack)
+ .Case("got", MIToken::kw_got)
+ .Case("jump-table", MIToken::kw_jump_table)
+ .Case("constant-pool", MIToken::kw_constant_pool)
+ .Case("call-entry", MIToken::kw_call_entry)
+ .Case("liveout", MIToken::kw_liveout)
+ .Case("address-taken", MIToken::kw_address_taken)
+ .Case("landing-pad", MIToken::kw_landing_pad)
+ .Case("liveins", MIToken::kw_liveins)
+ .Case("successors", MIToken::kw_successors)
.Default(MIToken::Identifier);
}
static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
- if (!isalpha(C.peek()) && C.peek() != '_')
+ if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.')
return None;
auto Range = C;
while (isIdentifierChar(C.peek()))
C.advance();
auto Identifier = Range.upto(C);
- Token = MIToken(getIdentifierKind(Identifier), Identifier);
+ Token.reset(getIdentifierKind(Identifier), Identifier)
+ .setStringValue(Identifier);
return C;
}
static Cursor maybeLexMachineBasicBlock(
Cursor C, MIToken &Token,
function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
- if (!C.remaining().startswith("%bb."))
+ bool IsReference = C.remaining().startswith("%bb.");
+ if (!IsReference && !C.remaining().startswith("bb."))
return None;
auto Range = C;
- C.advance(4); // Skip '%bb.'
+ unsigned PrefixLength = IsReference ? 4 : 3;
+ C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
if (!isdigit(C.peek())) {
- Token = MIToken(MIToken::Error, C.remaining());
+ Token.reset(MIToken::Error, C.remaining());
ErrorCallback(C.location(), "expected a number after '%bb.'");
return C;
}
@@ -103,26 +259,103 @@ static Cursor maybeLexMachineBasicBlock(
while (isdigit(C.peek()))
C.advance();
StringRef Number = NumberRange.upto(C);
- unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>'
+ unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
if (C.peek() == '.') {
C.advance(); // Skip '.'
++StringOffset;
while (isIdentifierChar(C.peek()))
C.advance();
}
- Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number),
- StringOffset);
+ Token.reset(IsReference ? MIToken::MachineBasicBlock
+ : MIToken::MachineBasicBlockLabel,
+ Range.upto(C))
+ .setIntegerValue(APSInt(Number))
+ .setStringValue(Range.upto(C).drop_front(StringOffset));
+ return C;
+}
+
+static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
+ MIToken::TokenKind Kind) {
+ if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ return None;
+ auto Range = C;
+ C.advance(Rule.size());
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
+ return C;
+}
+
+static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
+ MIToken::TokenKind Kind) {
+ if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ return None;
+ auto Range = C;
+ C.advance(Rule.size());
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ StringRef Number = NumberRange.upto(C);
+ unsigned StringOffset = Rule.size() + Number.size();
+ if (C.peek() == '.') {
+ C.advance();
+ ++StringOffset;
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ }
+ Token.reset(Kind, Range.upto(C))
+ .setIntegerValue(APSInt(Number))
+ .setStringValue(Range.upto(C).drop_front(StringOffset));
return C;
}
+static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
+}
+
+static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
+ return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
+}
+
+static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
+}
+
+static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
+}
+
+static Cursor maybeLexIRBlock(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ const StringRef Rule = "%ir-block.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ if (isdigit(C.peek(Rule.size())))
+ return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
+ return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
+}
+
+static Cursor maybeLexIRValue(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ const StringRef Rule = "%ir.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ if (isdigit(C.peek(Rule.size())))
+ return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
+ return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
+}
+
static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
auto Range = C;
C.advance(); // Skip '%'
auto NumberRange = C;
while (isdigit(C.peek()))
C.advance();
- Token = MIToken(MIToken::VirtualRegister, Range.upto(C),
- APSInt(NumberRange.upto(C)));
+ Token.reset(MIToken::VirtualRegister, Range.upto(C))
+ .setIntegerValue(APSInt(NumberRange.upto(C)));
return C;
}
@@ -135,41 +368,112 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
C.advance(); // Skip '%'
while (isIdentifierChar(C.peek()))
C.advance();
- Token = MIToken(MIToken::NamedRegister, Range.upto(C),
- /*StringOffset=*/1); // Drop the '%'
+ Token.reset(MIToken::NamedRegister, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
return C;
}
-static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) {
+static Cursor maybeLexGlobalValue(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
if (C.peek() != '@')
return None;
+ if (!isdigit(C.peek(1)))
+ return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
+ ErrorCallback);
auto Range = C;
- C.advance(); // Skip the '@'
- // TODO: add support for quoted names.
- if (!isdigit(C.peek())) {
- while (isIdentifierChar(C.peek()))
- C.advance();
- Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C),
- /*StringOffset=*/1); // Drop the '@'
- return C;
- }
+ C.advance(1); // Skip the '@'
auto NumberRange = C;
while (isdigit(C.peek()))
C.advance();
- Token =
- MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C)));
+ Token.reset(MIToken::GlobalValue, Range.upto(C))
+ .setIntegerValue(APSInt(NumberRange.upto(C)));
return C;
}
-static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) {
+static Cursor maybeLexExternalSymbol(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ if (C.peek() != '$')
+ return None;
+ return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
+ ErrorCallback);
+}
+
+static bool isValidHexFloatingPointPrefix(char C) {
+ return C == 'H' || C == 'K' || C == 'L' || C == 'M';
+}
+
+static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) {
+ if (C.peek() != '0' || C.peek(1) != 'x')
+ return None;
+ Cursor Range = C;
+ C.advance(2); // Skip '0x'
+ if (isValidHexFloatingPointPrefix(C.peek()))
+ C.advance();
+ while (isxdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
+ C.advance();
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(C.peek()))
+ C.advance();
+ if ((C.peek() == 'e' || C.peek() == 'E') &&
+ (isdigit(C.peek(1)) ||
+ ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
+ C.advance(2);
+ while (isdigit(C.peek()))
+ C.advance();
+ }
+ Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
return None;
auto Range = C;
C.advance();
while (isdigit(C.peek()))
C.advance();
+ if (C.peek() == '.')
+ return lexFloatingPointLiteral(Range, C, Token);
StringRef StrVal = Range.upto(C);
- Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal));
+ Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
+ return C;
+}
+
+static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
+ return StringSwitch<MIToken::TokenKind>(Identifier)
+ .Case("!tbaa", MIToken::md_tbaa)
+ .Case("!alias.scope", MIToken::md_alias_scope)
+ .Case("!noalias", MIToken::md_noalias)
+ .Case("!range", MIToken::md_range)
+ .Default(MIToken::Error);
+}
+
+static Cursor maybeLexExlaim(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ if (C.peek() != '!')
+ return None;
+ auto Range = C;
+ C.advance(1);
+ if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
+ Token.reset(MIToken::exclaim, Range.upto(C));
+ return C;
+ }
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ StringRef StrVal = Range.upto(C);
+ Token.reset(getMetadataKeywordKind(StrVal), StrVal);
+ if (Token.isError())
+ ErrorCallback(Token.location(),
+ "use of unknown metadata keyword '" + StrVal + "'");
return C;
}
@@ -181,44 +485,119 @@ static MIToken::TokenKind symbolToken(char C) {
return MIToken::equal;
case ':':
return MIToken::colon;
+ case '(':
+ return MIToken::lparen;
+ case ')':
+ return MIToken::rparen;
+ case '{':
+ return MIToken::lbrace;
+ case '}':
+ return MIToken::rbrace;
+ case '+':
+ return MIToken::plus;
+ case '-':
+ return MIToken::minus;
default:
return MIToken::Error;
}
}
static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
- auto Kind = symbolToken(C.peek());
+ MIToken::TokenKind Kind;
+ unsigned Length = 1;
+ if (C.peek() == ':' && C.peek(1) == ':') {
+ Kind = MIToken::coloncolon;
+ Length = 2;
+ } else
+ Kind = symbolToken(C.peek());
if (Kind == MIToken::Error)
return None;
auto Range = C;
+ C.advance(Length);
+ Token.reset(Kind, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
+ if (!isNewlineChar(C.peek()))
+ return None;
+ auto Range = C;
+ C.advance();
+ Token.reset(MIToken::Newline, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexEscapedIRValue(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ if (C.peek() != '`')
+ return None;
+ auto Range = C;
+ C.advance();
+ auto StrRange = C;
+ while (C.peek() != '`') {
+ if (C.isEOF() || isNewlineChar(C.peek())) {
+ ErrorCallback(
+ C.location(),
+ "end of machine instruction reached before the closing '`'");
+ Token.reset(MIToken::Error, Range.remaining());
+ return C;
+ }
+ C.advance();
+ }
+ StringRef Value = StrRange.upto(C);
C.advance();
- Token = MIToken(Kind, Range.upto(C));
+ Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
return C;
}
StringRef llvm::lexMIToken(
StringRef Source, MIToken &Token,
function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
- auto C = skipWhitespace(Cursor(Source));
+ auto C = skipComment(skipWhitespace(Cursor(Source)));
if (C.isEOF()) {
- Token = MIToken(MIToken::Eof, C.remaining());
+ Token.reset(MIToken::Eof, C.remaining());
return C.remaining();
}
- if (Cursor R = maybeLexIdentifier(C, Token))
+ if (Cursor R = maybeLexIntegerType(C, Token))
return R.remaining();
if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
return R.remaining();
+ if (Cursor R = maybeLexIdentifier(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexJumpTableIndex(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexStackObject(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexFixedStackObject(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexConstantPoolItem(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
+ return R.remaining();
if (Cursor R = maybeLexRegister(C, Token))
return R.remaining();
- if (Cursor R = maybeLexGlobalValue(C, Token))
+ if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
return R.remaining();
- if (Cursor R = maybeLexIntegerLiteral(C, Token))
+ if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexNumericalLiteral(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
return R.remaining();
if (Cursor R = maybeLexSymbol(C, Token))
return R.remaining();
+ if (Cursor R = maybeLexNewline(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
+ return R.remaining();
- Token = MIToken(MIToken::Error, C.remaining());
+ Token.reset(MIToken::Error, C.remaining());
ErrorCallback(C.location(),
Twine("unexpected character '") + Twine(C.peek()) + "'");
return C.remaining();
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index 55460b5..ff54aa3 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -30,50 +30,119 @@ struct MIToken {
// Markers
Eof,
Error,
+ Newline,
// Tokens with no info.
comma,
equal,
underscore,
colon,
+ coloncolon,
+ exclaim,
+ lparen,
+ rparen,
+ lbrace,
+ rbrace,
+ plus,
+ minus,
// Keywords
kw_implicit,
kw_implicit_define,
+ kw_def,
kw_dead,
kw_killed,
kw_undef,
+ kw_internal,
+ kw_early_clobber,
+ kw_debug_use,
+ kw_tied_def,
+ kw_frame_setup,
+ kw_debug_location,
+ kw_cfi_same_value,
+ kw_cfi_offset,
+ kw_cfi_def_cfa_register,
+ kw_cfi_def_cfa_offset,
+ kw_cfi_def_cfa,
+ kw_blockaddress,
+ kw_target_index,
+ kw_half,
+ kw_float,
+ kw_double,
+ kw_x86_fp80,
+ kw_fp128,
+ kw_ppc_fp128,
+ kw_target_flags,
+ kw_volatile,
+ kw_non_temporal,
+ kw_invariant,
+ kw_align,
+ kw_stack,
+ kw_got,
+ kw_jump_table,
+ kw_constant_pool,
+ kw_call_entry,
+ kw_liveout,
+ kw_address_taken,
+ kw_landing_pad,
+ kw_liveins,
+ kw_successors,
+
+ // Named metadata keywords
+ md_tbaa,
+ md_alias_scope,
+ md_noalias,
+ md_range,
// Identifier tokens
Identifier,
+ IntegerType,
NamedRegister,
+ MachineBasicBlockLabel,
MachineBasicBlock,
+ StackObject,
+ FixedStackObject,
NamedGlobalValue,
GlobalValue,
+ ExternalSymbol,
// Other tokens
IntegerLiteral,
- VirtualRegister
+ FloatingPointLiteral,
+ VirtualRegister,
+ ConstantPoolItem,
+ JumpTableIndex,
+ NamedIRBlock,
+ IRBlock,
+ NamedIRValue,
+ IRValue,
+ QuotedIRValue // `<constant value>`
};
private:
TokenKind Kind;
- unsigned StringOffset;
StringRef Range;
+ StringRef StringValue;
+ std::string StringValueStorage;
APSInt IntVal;
public:
- MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0)
- : Kind(Kind), StringOffset(StringOffset), Range(Range) {}
+ MIToken() : Kind(Error) {}
- MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal,
- unsigned StringOffset = 0)
- : Kind(Kind), StringOffset(StringOffset), Range(Range), IntVal(IntVal) {}
+ MIToken &reset(TokenKind Kind, StringRef Range);
+
+ MIToken &setStringValue(StringRef StrVal);
+ MIToken &setOwnedStringValue(std::string StrVal);
+ MIToken &setIntegerValue(APSInt IntVal);
TokenKind kind() const { return Kind; }
bool isError() const { return Kind == Error; }
+ bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
+
+ bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
+
bool isRegister() const {
return Kind == NamedRegister || Kind == underscore ||
Kind == VirtualRegister;
@@ -81,7 +150,14 @@ public:
bool isRegisterFlag() const {
return Kind == kw_implicit || Kind == kw_implicit_define ||
- Kind == kw_dead || Kind == kw_killed || Kind == kw_undef;
+ Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
+ Kind == kw_undef || Kind == kw_internal ||
+ Kind == kw_early_clobber || Kind == kw_debug_use;
+ }
+
+ bool isMemoryOperandFlag() const {
+ return Kind == kw_volatile || Kind == kw_non_temporal ||
+ Kind == kw_invariant;
}
bool is(TokenKind K) const { return Kind == K; }
@@ -90,13 +166,19 @@ public:
StringRef::iterator location() const { return Range.begin(); }
- StringRef stringValue() const { return Range.drop_front(StringOffset); }
+ StringRef range() const { return Range; }
+
+ /// Return the token's string value.
+ StringRef stringValue() const { return StringValue; }
const APSInt &integerValue() const { return IntVal; }
bool hasIntegerValue() const {
return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
- Kind == GlobalValue || Kind == VirtualRegister;
+ Kind == MachineBasicBlockLabel || Kind == StackObject ||
+ Kind == FixedStackObject || Kind == GlobalValue ||
+ Kind == VirtualRegister || Kind == ConstantPoolItem ||
+ Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
}
};
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index c000112..f2f6584 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -14,12 +14,20 @@
#include "MIParser.h"
#include "MILexer.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -30,15 +38,20 @@ using namespace llvm;
namespace {
/// A wrapper struct around the 'MachineOperand' struct that includes a source
-/// range.
-struct MachineOperandWithLocation {
+/// range and other attributes.
+struct ParsedMachineOperand {
MachineOperand Operand;
StringRef::iterator Begin;
StringRef::iterator End;
-
- MachineOperandWithLocation(const MachineOperand &Operand,
- StringRef::iterator Begin, StringRef::iterator End)
- : Operand(Operand), Begin(Begin), End(End) {}
+ Optional<unsigned> TiedDefIdx;
+
+ ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin,
+ StringRef::iterator End, Optional<unsigned> &TiedDefIdx)
+ : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) {
+ if (TiedDefIdx)
+ assert(Operand.isReg() && Operand.isUse() &&
+ "Only used register operands can be tied");
+ }
};
class MIParser {
@@ -58,6 +71,16 @@ class MIParser {
StringMap<const uint32_t *> Names2RegMasks;
/// Maps from subregister names to subregister indices.
StringMap<unsigned> Names2SubRegIndices;
+ /// Maps from slot numbers to function's unnamed basic blocks.
+ DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
+ /// Maps from slot numbers to function's unnamed values.
+ DenseMap<unsigned, const Value *> Slots2Values;
+ /// Maps from target index names to target indices.
+ StringMap<int> Names2TargetIndices;
+ /// Maps from direct target flag names to the direct target flag values.
+ StringMap<unsigned> Names2DirectTargetFlags;
+ /// Maps from direct target flag names to the bitmask target flag values.
+ StringMap<unsigned> Names2BitmaskTargetFlags;
public:
MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
@@ -76,19 +99,66 @@ public:
/// This function always return true.
bool error(StringRef::iterator Loc, const Twine &Msg);
+ bool
+ parseBasicBlockDefinitions(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+ bool parseBasicBlocks();
bool parse(MachineInstr *&MI);
- bool parseMBB(MachineBasicBlock *&MBB);
- bool parseNamedRegister(unsigned &Reg);
+ bool parseStandaloneMBB(MachineBasicBlock *&MBB);
+ bool parseStandaloneNamedRegister(unsigned &Reg);
+ bool parseStandaloneVirtualRegister(unsigned &Reg);
+ bool parseStandaloneStackObject(int &FI);
+ bool parseStandaloneMDNode(MDNode *&Node);
+
+ bool
+ parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+ bool parseBasicBlock(MachineBasicBlock &MBB);
+ bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
+ bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
bool parseRegister(unsigned &Reg);
bool parseRegisterFlag(unsigned &Flags);
bool parseSubRegisterIndex(unsigned &SubReg);
- bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);
+ bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
+ bool parseRegisterOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx, bool IsDef = false);
bool parseImmediateOperand(MachineOperand &Dest);
+ bool parseIRConstant(StringRef::iterator Loc, StringRef Source,
+ const Constant *&C);
+ bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
+ bool parseTypedImmediateOperand(MachineOperand &Dest);
+ bool parseFPImmediateOperand(MachineOperand &Dest);
bool parseMBBReference(MachineBasicBlock *&MBB);
bool parseMBBOperand(MachineOperand &Dest);
+ bool parseStackFrameIndex(int &FI);
+ bool parseStackObjectOperand(MachineOperand &Dest);
+ bool parseFixedStackFrameIndex(int &FI);
+ bool parseFixedStackObjectOperand(MachineOperand &Dest);
+ bool parseGlobalValue(GlobalValue *&GV);
bool parseGlobalAddressOperand(MachineOperand &Dest);
- bool parseMachineOperand(MachineOperand &Dest);
+ bool parseConstantPoolIndexOperand(MachineOperand &Dest);
+ bool parseJumpTableIndexOperand(MachineOperand &Dest);
+ bool parseExternalSymbolOperand(MachineOperand &Dest);
+ bool parseMDNode(MDNode *&Node);
+ bool parseMetadataOperand(MachineOperand &Dest);
+ bool parseCFIOffset(int &Offset);
+ bool parseCFIRegister(unsigned &Reg);
+ bool parseCFIOperand(MachineOperand &Dest);
+ bool parseIRBlock(BasicBlock *&BB, const Function &F);
+ bool parseBlockAddressOperand(MachineOperand &Dest);
+ bool parseTargetIndexOperand(MachineOperand &Dest);
+ bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
+ bool parseMachineOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx);
+ bool parseMachineOperandAndTargetFlags(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx);
+ bool parseOffset(int64_t &Offset);
+ bool parseAlignment(unsigned &Alignment);
+ bool parseOperandsOffset(MachineOperand &Op);
+ bool parseIRValue(const Value *&V);
+ bool parseMemoryOperandFlag(unsigned &Flags);
+ bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
+ bool parseMachinePointerInfo(MachinePointerInfo &Dest);
+ bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
private:
/// Convert the integer literal in the current token into an unsigned integer.
@@ -96,15 +166,31 @@ private:
/// Return true if an error occurred.
bool getUnsigned(unsigned &Result);
+ /// Convert the integer literal in the current token into an uint64.
+ ///
+ /// Return true if an error occurred.
+ bool getUint64(uint64_t &Result);
+
+ /// If the current token is of the given kind, consume it and return false.
+ /// Otherwise report an error and return true.
+ bool expectAndConsume(MIToken::TokenKind TokenKind);
+
+ /// If the current token is of the given kind, consume it and return true.
+ /// Otherwise return false.
+ bool consumeIfPresent(MIToken::TokenKind TokenKind);
+
void initNames2InstrOpCodes();
/// Try to convert an instruction name to an opcode. Return true if the
/// instruction name is invalid.
bool parseInstrName(StringRef InstrName, unsigned &OpCode);
- bool parseInstruction(unsigned &OpCode);
+ bool parseInstruction(unsigned &OpCode, unsigned &Flags);
+
+ bool assignRegisterTies(MachineInstr &MI,
+ ArrayRef<ParsedMachineOperand> Operands);
- bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands,
+ bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
const MCInstrDesc &MCID);
void initNames2Regs();
@@ -126,6 +212,34 @@ private:
///
/// Return 0 if the name isn't a subregister index class.
unsigned getSubRegIndex(StringRef Name);
+
+ const BasicBlock *getIRBlock(unsigned Slot);
+ const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
+
+ const Value *getIRValue(unsigned Slot);
+
+ void initNames2TargetIndices();
+
+ /// Try to convert a name of target index to the corresponding target index.
+ ///
+ /// Return true if the name isn't a name of a target index.
+ bool getTargetIndex(StringRef Name, int &Index);
+
+ void initNames2DirectTargetFlags();
+
+ /// Try to convert a name of a direct target flag to the corresponding
+ /// target flag.
+ ///
+ /// Return true if the name isn't a name of a direct flag.
+ bool getDirectTargetFlag(StringRef Name, unsigned &Flag);
+
+ void initNames2BitmaskTargetFlags();
+
+ /// Try to convert a name of a bitmask target flag to the corresponding
+ /// target flag.
+ ///
+ /// Return true if the name isn't a name of a bitmask target flag.
+ bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
};
} // end anonymous namespace
@@ -134,7 +248,7 @@ MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
StringRef Source, const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots)
: SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
- Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {}
+ PFS(PFS), IRSlots(IRSlots) {}
void MIParser::lex() {
CurrentSource = lexMIToken(
@@ -146,49 +260,378 @@ bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
- Error = SMDiagnostic(
- SM, SMLoc(),
- SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1,
- Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None);
+ const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID());
+ if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) {
+ // Create an ordinary diagnostic when the source manager's buffer is the
+ // source string.
+ Error = SM.GetMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
+ return true;
+ }
+ // Create a diagnostic for a YAML string literal.
+ Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
+ Loc - Source.data(), SourceMgr::DK_Error, Msg.str(),
+ Source, None, None);
return true;
}
-bool MIParser::parse(MachineInstr *&MI) {
+static const char *toString(MIToken::TokenKind TokenKind) {
+ switch (TokenKind) {
+ case MIToken::comma:
+ return "','";
+ case MIToken::equal:
+ return "'='";
+ case MIToken::colon:
+ return "':'";
+ case MIToken::lparen:
+ return "'('";
+ case MIToken::rparen:
+ return "')'";
+ default:
+ return "<unknown token>";
+ }
+}
+
+bool MIParser::expectAndConsume(MIToken::TokenKind TokenKind) {
+ if (Token.isNot(TokenKind))
+ return error(Twine("expected ") + toString(TokenKind));
+ lex();
+ return false;
+}
+
+bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) {
+ if (Token.isNot(TokenKind))
+ return false;
+ lex();
+ return true;
+}
+
+bool MIParser::parseBasicBlockDefinition(
+ DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ unsigned ID = 0;
+ if (getUnsigned(ID))
+ return true;
+ auto Loc = Token.location();
+ auto Name = Token.stringValue();
+ lex();
+ bool HasAddressTaken = false;
+ bool IsLandingPad = false;
+ unsigned Alignment = 0;
+ BasicBlock *BB = nullptr;
+ if (consumeIfPresent(MIToken::lparen)) {
+ do {
+ // TODO: Report an error when multiple same attributes are specified.
+ switch (Token.kind()) {
+ case MIToken::kw_address_taken:
+ HasAddressTaken = true;
+ lex();
+ break;
+ case MIToken::kw_landing_pad:
+ IsLandingPad = true;
+ lex();
+ break;
+ case MIToken::kw_align:
+ if (parseAlignment(Alignment))
+ return true;
+ break;
+ case MIToken::IRBlock:
+ // TODO: Report an error when both name and ir block are specified.
+ if (parseIRBlock(BB, *MF.getFunction()))
+ return true;
+ lex();
+ break;
+ default:
+ break;
+ }
+ } while (consumeIfPresent(MIToken::comma));
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ if (expectAndConsume(MIToken::colon))
+ return true;
+
+ if (!Name.empty()) {
+ BB = dyn_cast_or_null<BasicBlock>(
+ MF.getFunction()->getValueSymbolTable().lookup(Name));
+ if (!BB)
+ return error(Loc, Twine("basic block '") + Name +
+ "' is not defined in the function '" +
+ MF.getName() + "'");
+ }
+ auto *MBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(MF.end(), MBB);
+ bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second;
+ if (!WasInserted)
+ return error(Loc, Twine("redefinition of machine basic block with id #") +
+ Twine(ID));
+ if (Alignment)
+ MBB->setAlignment(Alignment);
+ if (HasAddressTaken)
+ MBB->setHasAddressTaken();
+ MBB->setIsEHPad(IsLandingPad);
+ return false;
+}
+
+bool MIParser::parseBasicBlockDefinitions(
+ DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+ lex();
+ // Skip until the first machine basic block.
+ while (Token.is(MIToken::Newline))
+ lex();
+ if (Token.isErrorOrEOF())
+ return Token.isError();
+ if (Token.isNot(MIToken::MachineBasicBlockLabel))
+ return error("expected a basic block definition before instructions");
+ unsigned BraceDepth = 0;
+ do {
+ if (parseBasicBlockDefinition(MBBSlots))
+ return true;
+ bool IsAfterNewline = false;
+ // Skip until the next machine basic block.
+ while (true) {
+ if ((Token.is(MIToken::MachineBasicBlockLabel) && IsAfterNewline) ||
+ Token.isErrorOrEOF())
+ break;
+ else if (Token.is(MIToken::MachineBasicBlockLabel))
+ return error("basic block definition should be located at the start of "
+ "the line");
+ else if (consumeIfPresent(MIToken::Newline)) {
+ IsAfterNewline = true;
+ continue;
+ }
+ IsAfterNewline = false;
+ if (Token.is(MIToken::lbrace))
+ ++BraceDepth;
+ if (Token.is(MIToken::rbrace)) {
+ if (!BraceDepth)
+ return error("extraneous closing brace ('}')");
+ --BraceDepth;
+ }
+ lex();
+ }
+ // Verify that we closed all of the '{' at the end of a file or a block.
+ if (!Token.isError() && BraceDepth)
+ return error("expected '}'"); // FIXME: Report a note that shows '{'.
+ } while (!Token.isErrorOrEOF());
+ return Token.isError();
+}
+
+bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
+ assert(Token.is(MIToken::kw_liveins));
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNewlineOrEOF()) // Allow an empty list of liveins.
+ return false;
+ do {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ unsigned Reg = 0;
+ if (parseRegister(Reg))
+ return true;
+ MBB.addLiveIn(Reg);
+ lex();
+ } while (consumeIfPresent(MIToken::comma));
+ return false;
+}
+
+bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
+ assert(Token.is(MIToken::kw_successors));
lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNewlineOrEOF()) // Allow an empty list of successors.
+ return false;
+ do {
+ if (Token.isNot(MIToken::MachineBasicBlock))
+ return error("expected a machine basic block reference");
+ MachineBasicBlock *SuccMBB = nullptr;
+ if (parseMBBReference(SuccMBB))
+ return true;
+ lex();
+ unsigned Weight = 0;
+ if (consumeIfPresent(MIToken::lparen)) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after '('");
+ if (getUnsigned(Weight))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ MBB.addSuccessor(SuccMBB, BranchProbability::getRaw(Weight));
+ } while (consumeIfPresent(MIToken::comma));
+ MBB.normalizeSuccProbs();
+ return false;
+}
+bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) {
+ // Skip the definition.
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ lex();
+ if (consumeIfPresent(MIToken::lparen)) {
+ while (Token.isNot(MIToken::rparen) && !Token.isErrorOrEOF())
+ lex();
+ consumeIfPresent(MIToken::rparen);
+ }
+ consumeIfPresent(MIToken::colon);
+
+ // Parse the liveins and successors.
+ // N.B: Multiple lists of successors and liveins are allowed and they're
+ // merged into one.
+ // Example:
+ // liveins: %edi
+ // liveins: %esi
+ //
+ // is equivalent to
+ // liveins: %edi, %esi
+ while (true) {
+ if (Token.is(MIToken::kw_successors)) {
+ if (parseBasicBlockSuccessors(MBB))
+ return true;
+ } else if (Token.is(MIToken::kw_liveins)) {
+ if (parseBasicBlockLiveins(MBB))
+ return true;
+ } else if (consumeIfPresent(MIToken::Newline)) {
+ continue;
+ } else
+ break;
+ if (!Token.isNewlineOrEOF())
+ return error("expected line break at the end of a list");
+ lex();
+ }
+
+ // Parse the instructions.
+ bool IsInBundle = false;
+ MachineInstr *PrevMI = nullptr;
+ while (true) {
+ if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof))
+ return false;
+ else if (consumeIfPresent(MIToken::Newline))
+ continue;
+ if (consumeIfPresent(MIToken::rbrace)) {
+ // The first parsing pass should verify that all closing '}' have an
+ // opening '{'.
+ assert(IsInBundle);
+ IsInBundle = false;
+ continue;
+ }
+ MachineInstr *MI = nullptr;
+ if (parse(MI))
+ return true;
+ MBB.insert(MBB.end(), MI);
+ if (IsInBundle) {
+ PrevMI->setFlag(MachineInstr::BundledSucc);
+ MI->setFlag(MachineInstr::BundledPred);
+ }
+ PrevMI = MI;
+ if (Token.is(MIToken::lbrace)) {
+ if (IsInBundle)
+ return error("nested instruction bundles are not allowed");
+ lex();
+ // This instruction is the start of the bundle.
+ MI->setFlag(MachineInstr::BundledSucc);
+ IsInBundle = true;
+ if (!Token.is(MIToken::Newline))
+ // The next instruction can be on the same line.
+ continue;
+ }
+ assert(Token.isNewlineOrEOF() && "MI is not fully parsed");
+ lex();
+ }
+ return false;
+}
+
+bool MIParser::parseBasicBlocks() {
+ lex();
+ // Skip until the first machine basic block.
+ while (Token.is(MIToken::Newline))
+ lex();
+ if (Token.isErrorOrEOF())
+ return Token.isError();
+ // The first parsing pass should have verified that this token is a MBB label
+ // in the 'parseBasicBlockDefinitions' method.
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ do {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB))
+ return true;
+ if (parseBasicBlock(*MBB))
+ return true;
+ // The method 'parseBasicBlock' should parse the whole block until the next
+ // block or the end of file.
+ assert(Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof));
+ } while (Token.isNot(MIToken::Eof));
+ return false;
+}
+
+bool MIParser::parse(MachineInstr *&MI) {
// Parse any register operands before '='
- // TODO: Allow parsing of multiple operands before '='
MachineOperand MO = MachineOperand::CreateImm(0);
- SmallVector<MachineOperandWithLocation, 8> Operands;
- if (Token.isRegister() || Token.isRegisterFlag()) {
+ SmallVector<ParsedMachineOperand, 8> Operands;
+ while (Token.isRegister() || Token.isRegisterFlag()) {
auto Loc = Token.location();
- if (parseRegisterOperand(MO, /*IsDef=*/true))
+ Optional<unsigned> TiedDefIdx;
+ if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true))
return true;
- Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
- if (Token.isNot(MIToken::equal))
- return error("expected '='");
+ Operands.push_back(
+ ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+ if (Token.isNot(MIToken::comma))
+ break;
lex();
}
-
- unsigned OpCode;
- if (Token.isError() || parseInstruction(OpCode))
+ if (!Operands.empty() && expectAndConsume(MIToken::equal))
return true;
- // TODO: Parse the instruction flags and memory operands.
+ unsigned OpCode, Flags = 0;
+ if (Token.isError() || parseInstruction(OpCode, Flags))
+ return true;
// Parse the remaining machine operands.
- while (Token.isNot(MIToken::Eof)) {
+ while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
+ Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
auto Loc = Token.location();
- if (parseMachineOperand(MO))
+ Optional<unsigned> TiedDefIdx;
+ if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
return true;
- Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
- if (Token.is(MIToken::Eof))
+ Operands.push_back(
+ ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
break;
if (Token.isNot(MIToken::comma))
return error("expected ',' before the next machine operand");
lex();
}
+ DebugLoc DebugLocation;
+ if (Token.is(MIToken::kw_debug_location)) {
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node after 'debug-location'");
+ MDNode *Node = nullptr;
+ if (parseMDNode(Node))
+ return true;
+ DebugLocation = DebugLoc(Node);
+ }
+
+ // Parse the machine memory operands.
+ SmallVector<MachineMemOperand *, 2> MemOperands;
+ if (Token.is(MIToken::coloncolon)) {
+ lex();
+ while (!Token.isNewlineOrEOF()) {
+ MachineMemOperand *MemOp = nullptr;
+ if (parseMachineMemoryOperand(MemOp))
+ return true;
+ MemOperands.push_back(MemOp);
+ if (Token.isNewlineOrEOF())
+ break;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine memory operand");
+ lex();
+ }
+ }
+
const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
if (!MCID.isVariadic()) {
// FIXME: Move the implicit operand verification to the machine verifier.
@@ -197,13 +640,22 @@ bool MIParser::parse(MachineInstr *&MI) {
}
// TODO: Check for extraneous machine operands.
- MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true);
+ MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
+ MI->setFlags(Flags);
for (const auto &Operand : Operands)
MI->addOperand(MF, Operand.Operand);
+ if (assignRegisterTies(*MI, Operands))
+ return true;
+ if (MemOperands.empty())
+ return false;
+ MachineInstr::mmo_iterator MemRefs =
+ MF.allocateMemRefsArray(MemOperands.size());
+ std::copy(MemOperands.begin(), MemOperands.end(), MemRefs);
+ MI->setMemRefs(MemRefs, MemRefs + MemOperands.size());
return false;
}
-bool MIParser::parseMBB(MachineBasicBlock *&MBB) {
+bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) {
lex();
if (Token.isNot(MIToken::MachineBasicBlock))
return error("expected a machine basic block reference");
@@ -216,18 +668,52 @@ bool MIParser::parseMBB(MachineBasicBlock *&MBB) {
return false;
}
-bool MIParser::parseNamedRegister(unsigned &Reg) {
+bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
lex();
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
if (parseRegister(Reg))
- return 0;
+ return true;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the register reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) {
+ lex();
+ if (Token.isNot(MIToken::VirtualRegister))
+ return error("expected a virtual register");
+ if (parseRegister(Reg))
+ return true;
lex();
if (Token.isNot(MIToken::Eof))
return error("expected end of string after the register reference");
return false;
}
+bool MIParser::parseStandaloneStackObject(int &FI) {
+ lex();
+ if (Token.isNot(MIToken::StackObject))
+ return error("expected a stack object");
+ if (parseStackFrameIndex(FI))
+ return true;
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the stack object reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node");
+ if (parseMDNode(Node))
+ return true;
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the metadata node");
+ return false;
+}
+
static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
assert(MO.isImplicit());
return MO.isDef() ? "implicit-def" : "implicit";
@@ -239,8 +725,18 @@ static std::string getRegisterName(const TargetRegisterInfo *TRI,
return StringRef(TRI->getName(Reg)).lower();
}
-bool MIParser::verifyImplicitOperands(
- ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) {
+/// Return true if the parsed machine operands contain a given machine operand.
+static bool isImplicitOperandIn(const MachineOperand &ImplicitOperand,
+ ArrayRef<ParsedMachineOperand> Operands) {
+ for (const auto &I : Operands) {
+ if (ImplicitOperand.isIdenticalTo(I.Operand))
+ return true;
+ }
+ return false;
+}
+
+bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
+ const MCInstrDesc &MCID) {
if (MCID.isCall())
// We can't verify call instructions as they can contain arbitrary implicit
// register and register mask operands.
@@ -249,48 +745,32 @@ bool MIParser::verifyImplicitOperands(
// Gather all the expected implicit operands.
SmallVector<MachineOperand, 4> ImplicitOperands;
if (MCID.ImplicitDefs)
- for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
ImplicitOperands.push_back(
MachineOperand::CreateReg(*ImpDefs, true, true));
if (MCID.ImplicitUses)
- for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
ImplicitOperands.push_back(
MachineOperand::CreateReg(*ImpUses, false, true));
const auto *TRI = MF.getSubtarget().getRegisterInfo();
assert(TRI && "Expected target register info");
- size_t I = ImplicitOperands.size(), J = Operands.size();
- while (I) {
- --I;
- if (J) {
- --J;
- const auto &ImplicitOperand = ImplicitOperands[I];
- const auto &Operand = Operands[J].Operand;
- if (ImplicitOperand.isIdenticalTo(Operand))
- continue;
- if (Operand.isReg() && Operand.isImplicit()) {
- return error(Operands[J].Begin,
- Twine("expected an implicit register operand '") +
- printImplicitRegisterFlag(ImplicitOperand) + " %" +
- getRegisterName(TRI, ImplicitOperand.getReg()) + "'");
- }
- }
- // TODO: Fix source location when Operands[J].end is right before '=', i.e:
- // insead of reporting an error at this location:
- // %eax = MOV32r0
- // ^
- // report the error at the following location:
- // %eax = MOV32r0
- // ^
- return error(J < Operands.size() ? Operands[J].End : Token.location(),
+ for (const auto &I : ImplicitOperands) {
+ if (isImplicitOperandIn(I, Operands))
+ continue;
+ return error(Operands.empty() ? Token.location() : Operands.back().End,
Twine("missing implicit register operand '") +
- printImplicitRegisterFlag(ImplicitOperands[I]) + " %" +
- getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'");
+ printImplicitRegisterFlag(I) + " %" +
+ getRegisterName(TRI, I.getReg()) + "'");
}
return false;
}
-bool MIParser::parseInstruction(unsigned &OpCode) {
+bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
+ if (Token.is(MIToken::kw_frame_setup)) {
+ Flags |= MachineInstr::FrameSetup;
+ lex();
+ }
if (Token.isNot(MIToken::Identifier))
return error("expected a machine instruction");
StringRef InstrName = Token.stringValue();
@@ -330,6 +810,7 @@ bool MIParser::parseRegister(unsigned &Reg) {
}
bool MIParser::parseRegisterFlag(unsigned &Flags) {
+ const unsigned OldFlags = Flags;
switch (Token.kind()) {
case MIToken::kw_implicit:
Flags |= RegState::Implicit;
@@ -337,6 +818,9 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
case MIToken::kw_implicit_define:
Flags |= RegState::ImplicitDefine;
break;
+ case MIToken::kw_def:
+ Flags |= RegState::Define;
+ break;
case MIToken::kw_dead:
Flags |= RegState::Dead;
break;
@@ -346,11 +830,22 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
case MIToken::kw_undef:
Flags |= RegState::Undef;
break;
- // TODO: report an error when we specify the same flag more than once.
- // TODO: parse the other register flags.
+ case MIToken::kw_internal:
+ Flags |= RegState::InternalRead;
+ break;
+ case MIToken::kw_early_clobber:
+ Flags |= RegState::EarlyClobber;
+ break;
+ case MIToken::kw_debug_use:
+ Flags |= RegState::Debug;
+ break;
default:
llvm_unreachable("The current token should be a register flag");
}
+ if (OldFlags == Flags)
+ // We know that the same flag is specified more than once when the flags
+ // weren't modified.
+ return error("duplicate '" + Token.stringValue() + "' register flag");
lex();
return false;
}
@@ -368,7 +863,59 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
return false;
}
-bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
+bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
+ if (!consumeIfPresent(MIToken::kw_tied_def))
+ return error("expected 'tied-def' after '('");
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after 'tied-def'");
+ if (getUnsigned(TiedDefIdx))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ return false;
+}
+
+bool MIParser::assignRegisterTies(MachineInstr &MI,
+ ArrayRef<ParsedMachineOperand> Operands) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs;
+ for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
+ if (!Operands[I].TiedDefIdx)
+ continue;
+ // The parser ensures that this operand is a register use, so we just have
+ // to check the tied-def operand.
+ unsigned DefIdx = Operands[I].TiedDefIdx.getValue();
+ if (DefIdx >= E)
+ return error(Operands[I].Begin,
+ Twine("use of invalid tied-def operand index '" +
+ Twine(DefIdx) + "'; instruction has only ") +
+ Twine(E) + " operands");
+ const auto &DefOperand = Operands[DefIdx].Operand;
+ if (!DefOperand.isReg() || !DefOperand.isDef())
+ // FIXME: add note with the def operand.
+ return error(Operands[I].Begin,
+ Twine("use of invalid tied-def operand index '") +
+ Twine(DefIdx) + "'; the operand #" + Twine(DefIdx) +
+ " isn't a defined register");
+ // Check that the tied-def operand wasn't tied elsewhere.
+ for (const auto &TiedPair : TiedRegisterPairs) {
+ if (TiedPair.first == DefIdx)
+ return error(Operands[I].Begin,
+ Twine("the tied-def operand #") + Twine(DefIdx) +
+ " is already tied with another register operand");
+ }
+ TiedRegisterPairs.push_back(std::make_pair(DefIdx, I));
+ }
+ // FIXME: Verify that for non INLINEASM instructions, the def and use tied
+ // indices must be less than tied max.
+ for (const auto &TiedPair : TiedRegisterPairs)
+ MI.tieOperands(TiedPair.first, TiedPair.second);
+ return false;
+}
+
+bool MIParser::parseRegisterOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx,
+ bool IsDef) {
unsigned Reg;
unsigned Flags = IsDef ? RegState::Define : 0;
while (Token.isRegisterFlag()) {
@@ -385,10 +932,17 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
if (parseSubRegisterIndex(SubReg))
return true;
}
+ if ((Flags & RegState::Define) == 0 && consumeIfPresent(MIToken::lparen)) {
+ unsigned Idx;
+ if (parseRegisterTiedDefIndex(Idx))
+ return true;
+ TiedDefIdx = Idx;
+ }
Dest = MachineOperand::CreateReg(
Reg, Flags & RegState::Define, Flags & RegState::Implicit,
Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
- /*isEarlyClobber=*/false, SubReg);
+ Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug,
+ Flags & RegState::InternalRead);
return false;
}
@@ -396,13 +950,55 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::IntegerLiteral));
const APSInt &Int = Token.integerValue();
if (Int.getMinSignedBits() > 64)
- // TODO: Replace this with an error when we can parse CIMM Machine Operands.
- llvm_unreachable("Can't parse large integer literals yet!");
+ return error("integer literal is too large to be an immediate operand");
Dest = MachineOperand::CreateImm(Int.getExtValue());
lex();
return false;
}
+bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ const Constant *&C) {
+ auto Source = StringValue.str(); // The source has to be null terminated.
+ SMDiagnostic Err;
+ C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(),
+ &IRSlots);
+ if (!C)
+ return error(Loc + Err.getColumnNo(), Err.getMessage());
+ return false;
+}
+
+bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
+ if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C))
+ return true;
+ lex();
+ return false;
+}
+
+bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::IntegerType));
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal");
+ const Constant *C = nullptr;
+ if (parseIRConstant(Loc, C))
+ return true;
+ Dest = MachineOperand::CreateCImm(cast<ConstantInt>(C));
+ return false;
+}
+
+bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::FloatingPointLiteral))
+ return error("expected a floating point literal");
+ const Constant *C = nullptr;
+ if (parseIRConstant(Loc, C))
+ return true;
+ Dest = MachineOperand::CreateFPImm(cast<ConstantFP>(C));
+ return false;
+}
+
bool MIParser::getUnsigned(unsigned &Result) {
assert(Token.hasIntegerValue() && "Expected a token with an integer value");
const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
@@ -414,7 +1010,8 @@ bool MIParser::getUnsigned(unsigned &Result) {
}
bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
- assert(Token.is(MIToken::MachineBasicBlock));
+ assert(Token.is(MIToken::MachineBasicBlock) ||
+ Token.is(MIToken::MachineBasicBlockLabel));
unsigned Number;
if (getUnsigned(Number))
return true;
@@ -438,16 +1035,66 @@ bool MIParser::parseMBBOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
+bool MIParser::parseStackFrameIndex(int &FI) {
+ assert(Token.is(MIToken::StackObject));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ObjectInfo = PFS.StackObjectSlots.find(ID);
+ if (ObjectInfo == PFS.StackObjectSlots.end())
+ return error(Twine("use of undefined stack object '%stack.") + Twine(ID) +
+ "'");
+ StringRef Name;
+ if (const auto *Alloca =
+ MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second))
+ Name = Alloca->getName();
+ if (!Token.stringValue().empty() && Token.stringValue() != Name)
+ return error(Twine("the name of the stack object '%stack.") + Twine(ID) +
+ "' isn't '" + Token.stringValue() + "'");
+ lex();
+ FI = ObjectInfo->second;
+ return false;
+}
+
+bool MIParser::parseStackObjectOperand(MachineOperand &Dest) {
+ int FI;
+ if (parseStackFrameIndex(FI))
+ return true;
+ Dest = MachineOperand::CreateFI(FI);
+ return false;
+}
+
+bool MIParser::parseFixedStackFrameIndex(int &FI) {
+ assert(Token.is(MIToken::FixedStackObject));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ObjectInfo = PFS.FixedStackObjectSlots.find(ID);
+ if (ObjectInfo == PFS.FixedStackObjectSlots.end())
+ return error(Twine("use of undefined fixed stack object '%fixed-stack.") +
+ Twine(ID) + "'");
+ lex();
+ FI = ObjectInfo->second;
+ return false;
+}
+
+bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
+ int FI;
+ if (parseFixedStackFrameIndex(FI))
+ return true;
+ Dest = MachineOperand::CreateFI(FI);
+ return false;
+}
+
+bool MIParser::parseGlobalValue(GlobalValue *&GV) {
switch (Token.kind()) {
case MIToken::NamedGlobalValue: {
- auto Name = Token.stringValue();
const Module *M = MF.getFunction()->getParent();
- if (const auto *GV = M->getNamedValue(Name)) {
- Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
- break;
- }
- return error(Twine("use of undefined global value '@") + Name + "'");
+ GV = M->getNamedValue(Token.stringValue());
+ if (!GV)
+ return error(Twine("use of undefined global value '") + Token.range() +
+ "'");
+ break;
}
case MIToken::GlobalValue: {
unsigned GVIdx;
@@ -456,36 +1103,323 @@ bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
if (GVIdx >= IRSlots.GlobalValues.size())
return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
"'");
- Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx],
- /*Offset=*/0);
+ GV = IRSlots.GlobalValues[GVIdx];
break;
}
default:
llvm_unreachable("The current token should be a global value");
}
- // TODO: Parse offset and target flags.
+ return false;
+}
+
+bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ lex();
+ Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseConstantPoolIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::ConstantPoolItem));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ConstantInfo = PFS.ConstantPoolSlots.find(ID);
+ if (ConstantInfo == PFS.ConstantPoolSlots.end())
+ return error("use of undefined constant '%const." + Twine(ID) + "'");
+ lex();
+ Dest = MachineOperand::CreateCPI(ID, /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseJumpTableIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::JumpTableIndex));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto JumpTableEntryInfo = PFS.JumpTableSlots.find(ID);
+ if (JumpTableEntryInfo == PFS.JumpTableSlots.end())
+ return error("use of undefined jump table '%jump-table." + Twine(ID) + "'");
+ lex();
+ Dest = MachineOperand::CreateJTI(JumpTableEntryInfo->second);
+ return false;
+}
+
+bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::ExternalSymbol));
+ const char *Symbol = MF.createExternalSymbolName(Token.stringValue());
+ lex();
+ Dest = MachineOperand::CreateES(Symbol);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseMDNode(MDNode *&Node) {
+ assert(Token.is(MIToken::exclaim));
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected metadata id after '!'");
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto NodeInfo = IRSlots.MetadataNodes.find(ID);
+ if (NodeInfo == IRSlots.MetadataNodes.end())
+ return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'");
+ lex();
+ Node = NodeInfo->second.get();
+ return false;
+}
+
+bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
+ MDNode *Node = nullptr;
+ if (parseMDNode(Node))
+ return true;
+ Dest = MachineOperand::CreateMetadata(Node);
+ return false;
+}
+
+bool MIParser::parseCFIOffset(int &Offset) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected a cfi offset");
+ if (Token.integerValue().getMinSignedBits() > 32)
+ return error("expected a 32 bit integer (the cfi offset is too large)");
+ Offset = (int)Token.integerValue().getExtValue();
+ lex();
+ return false;
+}
+
+bool MIParser::parseCFIRegister(unsigned &Reg) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a cfi register");
+ unsigned LLVMReg;
+ if (parseRegister(LLVMReg))
+ return true;
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ int DwarfReg = TRI->getDwarfRegNum(LLVMReg, true);
+ if (DwarfReg < 0)
+ return error("invalid DWARF register");
+ Reg = (unsigned)DwarfReg;
+ lex();
+ return false;
+}
+
+bool MIParser::parseCFIOperand(MachineOperand &Dest) {
+ auto Kind = Token.kind();
+ lex();
+ auto &MMI = MF.getMMI();
+ int Offset;
+ unsigned Reg;
+ unsigned CFIIndex;
+ switch (Kind) {
+ case MIToken::kw_cfi_same_value:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_offset:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
+ break;
+ case MIToken::kw_cfi_def_cfa_register:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_def_cfa_offset:
+ if (parseCFIOffset(Offset))
+ return true;
+ // NB: MCCFIInstruction::createDefCfaOffset negates the offset.
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
+ break;
+ case MIToken::kw_cfi_def_cfa:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ // NB: MCCFIInstruction::createDefCfa negates the offset.
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
+ break;
+ default:
+ // TODO: Parse the other CFI operands.
+ llvm_unreachable("The current token should be a cfi operand");
+ }
+ Dest = MachineOperand::CreateCFIIndex(CFIIndex);
+ return false;
+}
+
+bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
+ switch (Token.kind()) {
+ case MIToken::NamedIRBlock: {
+ BB = dyn_cast_or_null<BasicBlock>(
+ F.getValueSymbolTable().lookup(Token.stringValue()));
+ if (!BB)
+ return error(Twine("use of undefined IR block '") + Token.range() + "'");
+ break;
+ }
+ case MIToken::IRBlock: {
+ unsigned SlotNumber = 0;
+ if (getUnsigned(SlotNumber))
+ return true;
+ BB = const_cast<BasicBlock *>(getIRBlock(SlotNumber, F));
+ if (!BB)
+ return error(Twine("use of undefined IR block '%ir-block.") +
+ Twine(SlotNumber) + "'");
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be an IR block reference");
+ }
+ return false;
+}
+
+bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_blockaddress));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::GlobalValue) &&
+ Token.isNot(MIToken::NamedGlobalValue))
+ return error("expected a global value");
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ auto *F = dyn_cast<Function>(GV);
+ if (!F)
+ return error("expected an IR function reference");
+ lex();
+ if (expectAndConsume(MIToken::comma))
+ return true;
+ BasicBlock *BB = nullptr;
+ if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))
+ return error("expected an IR block reference");
+ if (parseIRBlock(BB, *F))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateBA(BlockAddress::get(F, BB), /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_target_index));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target index");
+ int Index = 0;
+ if (getTargetIndex(Token.stringValue(), Index))
+ return error("use of undefined target index '" + Token.stringValue() + "'");
lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateTargetIndex(unsigned(Index), /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_liveout));
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ while (true) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ unsigned Reg = 0;
+ if (parseRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ // TODO: Report an error if the same register is used more than once.
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateRegLiveOut(Mask);
return false;
}
-bool MIParser::parseMachineOperand(MachineOperand &Dest) {
+bool MIParser::parseMachineOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx) {
switch (Token.kind()) {
case MIToken::kw_implicit:
case MIToken::kw_implicit_define:
+ case MIToken::kw_def:
case MIToken::kw_dead:
case MIToken::kw_killed:
case MIToken::kw_undef:
+ case MIToken::kw_internal:
+ case MIToken::kw_early_clobber:
+ case MIToken::kw_debug_use:
case MIToken::underscore:
case MIToken::NamedRegister:
case MIToken::VirtualRegister:
- return parseRegisterOperand(Dest);
+ return parseRegisterOperand(Dest, TiedDefIdx);
case MIToken::IntegerLiteral:
return parseImmediateOperand(Dest);
+ case MIToken::IntegerType:
+ return parseTypedImmediateOperand(Dest);
+ case MIToken::kw_half:
+ case MIToken::kw_float:
+ case MIToken::kw_double:
+ case MIToken::kw_x86_fp80:
+ case MIToken::kw_fp128:
+ case MIToken::kw_ppc_fp128:
+ return parseFPImmediateOperand(Dest);
case MIToken::MachineBasicBlock:
return parseMBBOperand(Dest);
+ case MIToken::StackObject:
+ return parseStackObjectOperand(Dest);
+ case MIToken::FixedStackObject:
+ return parseFixedStackObjectOperand(Dest);
case MIToken::GlobalValue:
case MIToken::NamedGlobalValue:
return parseGlobalAddressOperand(Dest);
+ case MIToken::ConstantPoolItem:
+ return parseConstantPoolIndexOperand(Dest);
+ case MIToken::JumpTableIndex:
+ return parseJumpTableIndexOperand(Dest);
+ case MIToken::ExternalSymbol:
+ return parseExternalSymbolOperand(Dest);
+ case MIToken::exclaim:
+ return parseMetadataOperand(Dest);
+ case MIToken::kw_cfi_same_value:
+ case MIToken::kw_cfi_offset:
+ case MIToken::kw_cfi_def_cfa_register:
+ case MIToken::kw_cfi_def_cfa_offset:
+ case MIToken::kw_cfi_def_cfa:
+ return parseCFIOperand(Dest);
+ case MIToken::kw_blockaddress:
+ return parseBlockAddressOperand(Dest);
+ case MIToken::kw_target_index:
+ return parseTargetIndexOperand(Dest);
+ case MIToken::kw_liveout:
+ return parseLiveoutRegisterMaskOperand(Dest);
case MIToken::Error:
return true;
case MIToken::Identifier:
@@ -496,12 +1430,314 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest) {
}
// fallthrough
default:
- // TODO: parse the other machine operands.
+ // FIXME: Parse the MCSymbol machine operand.
return error("expected a machine operand");
}
return false;
}
+bool MIParser::parseMachineOperandAndTargetFlags(
+ MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) {
+ unsigned TF = 0;
+ bool HasTargetFlags = false;
+ if (Token.is(MIToken::kw_target_flags)) {
+ HasTargetFlags = true;
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target flag");
+ if (getDirectTargetFlag(Token.stringValue(), TF)) {
+ if (getBitmaskTargetFlag(Token.stringValue(), TF))
+ return error("use of undefined target flag '" + Token.stringValue() +
+ "'");
+ }
+ lex();
+ while (Token.is(MIToken::comma)) {
+ lex();
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target flag");
+ unsigned BitFlag = 0;
+ if (getBitmaskTargetFlag(Token.stringValue(), BitFlag))
+ return error("use of undefined target flag '" + Token.stringValue() +
+ "'");
+ // TODO: Report an error when using a duplicate bit target flag.
+ TF |= BitFlag;
+ lex();
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ auto Loc = Token.location();
+ if (parseMachineOperand(Dest, TiedDefIdx))
+ return true;
+ if (!HasTargetFlags)
+ return false;
+ if (Dest.isReg())
+ return error(Loc, "register operands can't have target flags");
+ Dest.setTargetFlags(TF);
+ return false;
+}
+
+bool MIParser::parseOffset(int64_t &Offset) {
+ if (Token.isNot(MIToken::plus) && Token.isNot(MIToken::minus))
+ return false;
+ StringRef Sign = Token.range();
+ bool IsNegative = Token.is(MIToken::minus);
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after '" + Sign + "'");
+ if (Token.integerValue().getMinSignedBits() > 64)
+ return error("expected 64-bit integer (too large)");
+ Offset = Token.integerValue().getExtValue();
+ if (IsNegative)
+ Offset = -Offset;
+ lex();
+ return false;
+}
+
+bool MIParser::parseAlignment(unsigned &Alignment) {
+ assert(Token.is(MIToken::kw_align));
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected an integer literal after 'align'");
+ if (getUnsigned(Alignment))
+ return true;
+ lex();
+ return false;
+}
+
+bool MIParser::parseOperandsOffset(MachineOperand &Op) {
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Op.setOffset(Offset);
+ return false;
+}
+
+bool MIParser::parseIRValue(const Value *&V) {
+ switch (Token.kind()) {
+ case MIToken::NamedIRValue: {
+ V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
+ break;
+ }
+ case MIToken::IRValue: {
+ unsigned SlotNumber = 0;
+ if (getUnsigned(SlotNumber))
+ return true;
+ V = getIRValue(SlotNumber);
+ break;
+ }
+ case MIToken::NamedGlobalValue:
+ case MIToken::GlobalValue: {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ V = GV;
+ break;
+ }
+ case MIToken::QuotedIRValue: {
+ const Constant *C = nullptr;
+ if (parseIRConstant(Token.location(), Token.stringValue(), C))
+ return true;
+ V = C;
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be an IR block reference");
+ }
+ if (!V)
+ return error(Twine("use of undefined IR value '") + Token.range() + "'");
+ return false;
+}
+
+bool MIParser::getUint64(uint64_t &Result) {
+ assert(Token.hasIntegerValue());
+ if (Token.integerValue().getActiveBits() > 64)
+ return error("expected 64-bit integer (too large)");
+ Result = Token.integerValue().getZExtValue();
+ return false;
+}
+
+bool MIParser::parseMemoryOperandFlag(unsigned &Flags) {
+ const unsigned OldFlags = Flags;
+ switch (Token.kind()) {
+ case MIToken::kw_volatile:
+ Flags |= MachineMemOperand::MOVolatile;
+ break;
+ case MIToken::kw_non_temporal:
+ Flags |= MachineMemOperand::MONonTemporal;
+ break;
+ case MIToken::kw_invariant:
+ Flags |= MachineMemOperand::MOInvariant;
+ break;
+ // TODO: parse the target specific memory operand flags.
+ default:
+ llvm_unreachable("The current token should be a memory operand flag");
+ }
+ if (OldFlags == Flags)
+ // We know that the same flag is specified more than once when the flags
+ // weren't modified.
+ return error("duplicate '" + Token.stringValue() + "' memory operand flag");
+ lex();
+ return false;
+}
+
+bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
+ switch (Token.kind()) {
+ case MIToken::kw_stack:
+ PSV = MF.getPSVManager().getStack();
+ break;
+ case MIToken::kw_got:
+ PSV = MF.getPSVManager().getGOT();
+ break;
+ case MIToken::kw_jump_table:
+ PSV = MF.getPSVManager().getJumpTable();
+ break;
+ case MIToken::kw_constant_pool:
+ PSV = MF.getPSVManager().getConstantPool();
+ break;
+ case MIToken::FixedStackObject: {
+ int FI;
+ if (parseFixedStackFrameIndex(FI))
+ return true;
+ PSV = MF.getPSVManager().getFixedStack(FI);
+ // The token was already consumed, so use return here instead of break.
+ return false;
+ }
+ case MIToken::kw_call_entry: {
+ lex();
+ switch (Token.kind()) {
+ case MIToken::GlobalValue:
+ case MIToken::NamedGlobalValue: {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ PSV = MF.getPSVManager().getGlobalValueCallEntry(GV);
+ break;
+ }
+ case MIToken::ExternalSymbol:
+ PSV = MF.getPSVManager().getExternalSymbolCallEntry(
+ MF.createExternalSymbolName(Token.stringValue()));
+ break;
+ default:
+ return error(
+ "expected a global value or an external symbol after 'call-entry'");
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be pseudo source value");
+ }
+ lex();
+ return false;
+}
+
+bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
+ if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
+ Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
+ Token.is(MIToken::FixedStackObject) || Token.is(MIToken::kw_call_entry)) {
+ const PseudoSourceValue *PSV = nullptr;
+ if (parseMemoryPseudoSourceValue(PSV))
+ return true;
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Dest = MachinePointerInfo(PSV, Offset);
+ return false;
+ }
+ if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) &&
+ Token.isNot(MIToken::GlobalValue) &&
+ Token.isNot(MIToken::NamedGlobalValue) &&
+ Token.isNot(MIToken::QuotedIRValue))
+ return error("expected an IR value reference");
+ const Value *V = nullptr;
+ if (parseIRValue(V))
+ return true;
+ if (!V->getType()->isPointerTy())
+ return error("expected a pointer IR value");
+ lex();
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Dest = MachinePointerInfo(V, Offset);
+ return false;
+}
+
+bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ unsigned Flags = 0;
+ while (Token.isMemoryOperandFlag()) {
+ if (parseMemoryOperandFlag(Flags))
+ return true;
+ }
+ if (Token.isNot(MIToken::Identifier) ||
+ (Token.stringValue() != "load" && Token.stringValue() != "store"))
+ return error("expected 'load' or 'store' memory operation");
+ if (Token.stringValue() == "load")
+ Flags |= MachineMemOperand::MOLoad;
+ else
+ Flags |= MachineMemOperand::MOStore;
+ lex();
+
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected the size integer literal after memory operation");
+ uint64_t Size;
+ if (getUint64(Size))
+ return true;
+ lex();
+
+ const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into";
+ if (Token.isNot(MIToken::Identifier) || Token.stringValue() != Word)
+ return error(Twine("expected '") + Word + "'");
+ lex();
+
+ MachinePointerInfo Ptr = MachinePointerInfo();
+ if (parseMachinePointerInfo(Ptr))
+ return true;
+ unsigned BaseAlignment = Size;
+ AAMDNodes AAInfo;
+ MDNode *Range = nullptr;
+ while (consumeIfPresent(MIToken::comma)) {
+ switch (Token.kind()) {
+ case MIToken::kw_align:
+ if (parseAlignment(BaseAlignment))
+ return true;
+ break;
+ case MIToken::md_tbaa:
+ lex();
+ if (parseMDNode(AAInfo.TBAA))
+ return true;
+ break;
+ case MIToken::md_alias_scope:
+ lex();
+ if (parseMDNode(AAInfo.Scope))
+ return true;
+ break;
+ case MIToken::md_noalias:
+ lex();
+ if (parseMDNode(AAInfo.NoAlias))
+ return true;
+ break;
+ case MIToken::md_range:
+ lex();
+ if (parseMDNode(Range))
+ return true;
+ break;
+ // TODO: Report an error on duplicate metadata nodes.
+ default:
+ return error("expected 'align' or '!tbaa' or '!alias.scope' or "
+ "'!noalias' or '!range'");
+ }
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest =
+ MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range);
+ return false;
+}
+
void MIParser::initNames2InstrOpCodes() {
if (!Names2InstrOpCodes.empty())
return;
@@ -583,18 +1819,162 @@ unsigned MIParser::getSubRegIndex(StringRef Name) {
return SubRegInfo->getValue();
}
-bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM,
- MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI);
+static void initSlots2BasicBlocks(
+ const Function &F,
+ DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (auto &BB : F) {
+ if (BB.hasName())
+ continue;
+ int Slot = MST.getLocalSlot(&BB);
+ if (Slot == -1)
+ continue;
+ Slots2BasicBlocks.insert(std::make_pair(unsigned(Slot), &BB));
+ }
+}
+
+static const BasicBlock *getIRBlockFromSlot(
+ unsigned Slot,
+ const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+ auto BlockInfo = Slots2BasicBlocks.find(Slot);
+ if (BlockInfo == Slots2BasicBlocks.end())
+ return nullptr;
+ return BlockInfo->second;
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot) {
+ if (Slots2BasicBlocks.empty())
+ initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks);
+ return getIRBlockFromSlot(Slot, Slots2BasicBlocks);
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
+ if (&F == MF.getFunction())
+ return getIRBlock(Slot);
+ DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks;
+ initSlots2BasicBlocks(F, CustomSlots2BasicBlocks);
+ return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks);
+}
+
+static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ int Slot = MST.getLocalSlot(V);
+ if (Slot == -1)
+ return;
+ Slots2Values.insert(std::make_pair(unsigned(Slot), V));
+}
+
+/// Creates the mapping from slot numbers to function's unnamed IR values.
+static void initSlots2Values(const Function &F,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (const auto &Arg : F.args())
+ mapValueToSlot(&Arg, MST, Slots2Values);
+ for (const auto &BB : F) {
+ mapValueToSlot(&BB, MST, Slots2Values);
+ for (const auto &I : BB)
+ mapValueToSlot(&I, MST, Slots2Values);
+ }
+}
+
+const Value *MIParser::getIRValue(unsigned Slot) {
+ if (Slots2Values.empty())
+ initSlots2Values(*MF.getFunction(), Slots2Values);
+ auto ValueInfo = Slots2Values.find(Slot);
+ if (ValueInfo == Slots2Values.end())
+ return nullptr;
+ return ValueInfo->second;
+}
+
+void MIParser::initNames2TargetIndices() {
+ if (!Names2TargetIndices.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ for (const auto &I : Indices)
+ Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getTargetIndex(StringRef Name, int &Index) {
+ initNames2TargetIndices();
+ auto IndexInfo = Names2TargetIndices.find(Name);
+ if (IndexInfo == Names2TargetIndices.end())
+ return true;
+ Index = IndexInfo->second;
+ return false;
+}
+
+void MIParser::initNames2DirectTargetFlags() {
+ if (!Names2DirectTargetFlags.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2DirectTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) {
+ initNames2DirectTargetFlags();
+ auto FlagInfo = Names2DirectTargetFlags.find(Name);
+ if (FlagInfo == Names2DirectTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void MIParser::initNames2BitmaskTargetFlags() {
+ if (!Names2BitmaskTargetFlags.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2BitmaskTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
+ initNames2BitmaskTargetFlags();
+ auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
+ if (FlagInfo == Names2BitmaskTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+bool llvm::parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src,
+ PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error) {
+ SourceMgr SM;
+ SM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false),
+ SMLoc());
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+ .parseBasicBlockDefinitions(PFS.MBBSlots);
+}
+
+bool llvm::parseMachineInstructions(MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error) {
+ SourceMgr SM;
+ SM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false),
+ SMLoc());
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseBasicBlocks();
}
bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
MachineFunction &MF, StringRef Src,
const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots, SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB);
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMBB(MBB);
}
bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
@@ -602,5 +1982,30 @@ bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots,
SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseNamedRegister(Reg);
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+ .parseStandaloneNamedRegister(Reg);
+}
+
+bool llvm::parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error) {
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+ .parseStandaloneVirtualRegister(Reg);
+}
+
+bool llvm::parseStackObjectReference(int &FI, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error) {
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+ .parseStandaloneStackObject(FI);
+}
+
+bool llvm::parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF,
+ StringRef Src, const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error) {
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMDNode(Node);
}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
index fca4c4e..8aef704 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
@@ -19,9 +19,11 @@
namespace llvm {
+class BasicBlock;
class MachineBasicBlock;
class MachineInstr;
class MachineFunction;
+class MDNode;
struct SlotMapping;
class SMDiagnostic;
class SourceMgr;
@@ -29,11 +31,42 @@ class SourceMgr;
struct PerFunctionMIParsingState {
DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
DenseMap<unsigned, unsigned> VirtualRegisterSlots;
+ DenseMap<unsigned, int> FixedStackObjectSlots;
+ DenseMap<unsigned, int> StackObjectSlots;
+ DenseMap<unsigned, unsigned> ConstantPoolSlots;
+ DenseMap<unsigned, unsigned> JumpTableSlots;
};
-bool parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF,
- StringRef Src, const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error);
+/// Parse the machine basic block definitions, and skip the machine
+/// instructions.
+///
+/// This function runs the first parsing pass on the machine function's body.
+/// It parses only the machine basic block definitions and creates the machine
+/// basic blocks in the given machine function.
+///
+/// The machine instructions aren't parsed during the first pass because all
+/// the machine basic blocks aren't defined yet - this makes it impossible to
+/// resolve the machine basic block references.
+///
+/// Return true if an error occurred.
+bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src,
+ PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error);
+
+/// Parse the machine instructions.
+///
+/// This function runs the second parsing pass on the machine function's body.
+/// It skips the machine basic block definitions and parses only the machine
+/// instructions and basic block attributes like liveins and successors.
+///
+/// The second parsing pass assumes that the first parsing pass already ran
+/// on the given source string.
+///
+/// Return true if an error occurred.
+bool parseMachineInstructions(MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error);
bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
MachineFunction &MF, StringRef Src,
@@ -46,6 +79,21 @@ bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
const SlotMapping &IRSlots,
SMDiagnostic &Error);
+bool parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error);
+
+bool parseStackObjectReference(int &FI, SourceMgr &SM, MachineFunction &MF,
+ StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error);
+
+bool parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF,
+ StringRef Src, const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error);
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 16b0e16..422efbc 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -20,8 +20,10 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/IR/BasicBlock.h"
@@ -95,30 +97,53 @@ public:
/// Return true if error occurred.
bool initializeMachineFunction(MachineFunction &MF);
- /// Initialize the machine basic block using it's YAML representation.
- ///
- /// Return true if an error occurred.
- bool initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
- const yaml::MachineBasicBlock &YamlMBB,
- const PerFunctionMIParsingState &PFS);
+ bool initializeRegisterInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF,
+ PerFunctionMIParsingState &PFS);
+
+ void inferRegisterInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF);
+
+ bool initializeFrameInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF,
+ PerFunctionMIParsingState &PFS);
+
+ bool parseCalleeSavedRegister(MachineFunction &MF,
+ PerFunctionMIParsingState &PFS,
+ std::vector<CalleeSavedInfo> &CSIInfo,
+ const yaml::StringValue &RegisterSource,
+ int FrameIdx);
+
+ bool parseStackObjectsDebugInfo(MachineFunction &MF,
+ PerFunctionMIParsingState &PFS,
+ const yaml::MachineStackObject &Object,
+ int FrameIdx);
- bool
- initializeRegisterInfo(const MachineFunction &MF,
- MachineRegisterInfo &RegInfo,
- const yaml::MachineFunction &YamlMF,
- DenseMap<unsigned, unsigned> &VirtualRegisterSlots);
+ bool initializeConstantPool(MachineConstantPool &ConstantPool,
+ const yaml::MachineFunction &YamlMF,
+ const MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &ConstantPoolSlots);
- bool initializeFrameInfo(MachineFrameInfo &MFI,
- const yaml::MachineFunction &YamlMF);
+ bool initializeJumpTableInfo(MachineFunction &MF,
+ const yaml::MachineJumpTable &YamlJTI,
+ PerFunctionMIParsingState &PFS);
private:
+ bool parseMDNode(MDNode *&Node, const yaml::StringValue &Source,
+ MachineFunction &MF, const PerFunctionMIParsingState &PFS);
+
+ bool parseMBBReference(MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source, MachineFunction &MF,
+ const PerFunctionMIParsingState &PFS);
+
/// Return a MIR diagnostic converted from an MI string diagnostic.
SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error,
SMRange SourceRange);
- /// Return a MIR diagnostic converted from an LLVM assembly diagnostic.
- SMDiagnostic diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
- SMRange SourceRange);
+ /// Return a MIR diagnostic converted from a diagnostic located in a YAML
+ /// block scalar string.
+ SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange);
/// Create an empty function with the given name.
void createDummyFunction(StringRef Name, Module &M);
@@ -200,7 +225,7 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
Context, &IRSlots);
if (!M) {
- reportDiagnostic(diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange()));
+ reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange()));
return M;
}
In.nextDocument();
@@ -261,88 +286,56 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
MF.setHasInlineAsm(YamlMF.HasInlineAsm);
PerFunctionMIParsingState PFS;
- if (initializeRegisterInfo(MF, MF.getRegInfo(), YamlMF,
- PFS.VirtualRegisterSlots))
- return true;
- if (initializeFrameInfo(*MF.getFrameInfo(), YamlMF))
+ if (initializeRegisterInfo(MF, YamlMF, PFS))
return true;
-
- const auto &F = *MF.getFunction();
- for (const auto &YamlMBB : YamlMF.BasicBlocks) {
- const BasicBlock *BB = nullptr;
- const yaml::StringValue &Name = YamlMBB.Name;
- if (!Name.Value.empty()) {
- BB = dyn_cast_or_null<BasicBlock>(
- F.getValueSymbolTable().lookup(Name.Value));
- if (!BB)
- return error(Name.SourceRange.Start,
- Twine("basic block '") + Name.Value +
- "' is not defined in the function '" + MF.getName() +
- "'");
- }
- auto *MBB = MF.CreateMachineBasicBlock(BB);
- MF.insert(MF.end(), MBB);
- bool WasInserted =
- PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second;
- if (!WasInserted)
- return error(Twine("redefinition of machine basic block with id #") +
- Twine(YamlMBB.ID));
- }
-
- if (YamlMF.BasicBlocks.empty())
- return error(Twine("machine function '") + Twine(MF.getName()) +
- "' requires at least one machine basic block in its body");
- // Initialize the machine basic blocks after creating them all so that the
- // machine instructions parser can resolve the MBB references.
- unsigned I = 0;
- for (const auto &YamlMBB : YamlMF.BasicBlocks) {
- if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB,
- PFS))
+ if (!YamlMF.Constants.empty()) {
+ auto *ConstantPool = MF.getConstantPool();
+ assert(ConstantPool && "Constant pool must be created");
+ if (initializeConstantPool(*ConstantPool, YamlMF, MF,
+ PFS.ConstantPoolSlots))
return true;
}
- return false;
-}
-bool MIRParserImpl::initializeMachineBasicBlock(
- MachineFunction &MF, MachineBasicBlock &MBB,
- const yaml::MachineBasicBlock &YamlMBB,
- const PerFunctionMIParsingState &PFS) {
- MBB.setAlignment(YamlMBB.Alignment);
- if (YamlMBB.AddressTaken)
- MBB.setHasAddressTaken();
- MBB.setIsLandingPad(YamlMBB.IsLandingPad);
SMDiagnostic Error;
- // Parse the successors.
- for (const auto &MBBSource : YamlMBB.Successors) {
- MachineBasicBlock *SuccMBB = nullptr;
- if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, PFS, IRSlots,
- Error))
- return error(Error, MBBSource.SourceRange);
- // TODO: Report an error when adding the same successor more than once.
- MBB.addSuccessor(SuccMBB);
- }
- // Parse the liveins.
- for (const auto &LiveInSource : YamlMBB.LiveIns) {
- unsigned Reg = 0;
- if (parseNamedRegisterReference(Reg, SM, MF, LiveInSource.Value, PFS,
- IRSlots, Error))
- return error(Error, LiveInSource.SourceRange);
- MBB.addLiveIn(Reg);
+ if (parseMachineBasicBlockDefinitions(MF, YamlMF.Body.Value.Value, PFS,
+ IRSlots, Error)) {
+ reportDiagnostic(
+ diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+ return true;
}
- // Parse the instructions.
- for (const auto &MISource : YamlMBB.Instructions) {
- MachineInstr *MI = nullptr;
- if (parseMachineInstr(MI, SM, MF, MISource.Value, PFS, IRSlots, Error))
- return error(Error, MISource.SourceRange);
- MBB.insert(MBB.end(), MI);
+
+ if (MF.empty())
+ return error(Twine("machine function '") + Twine(MF.getName()) +
+ "' requires at least one machine basic block in its body");
+ // Initialize the frame information after creating all the MBBs so that the
+ // MBB references in the frame information can be resolved.
+ if (initializeFrameInfo(MF, YamlMF, PFS))
+ return true;
+ // Initialize the jump table after creating all the MBBs so that the MBB
+ // references can be resolved.
+ if (!YamlMF.JumpTableInfo.Entries.empty() &&
+ initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS))
+ return true;
+ // Parse the machine instructions after creating all of the MBBs so that the
+ // parser can resolve the MBB references.
+ if (parseMachineInstructions(MF, YamlMF.Body.Value.Value, PFS, IRSlots,
+ Error)) {
+ reportDiagnostic(
+ diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+ return true;
}
+ inferRegisterInfo(MF, YamlMF);
+ // FIXME: This is a temporary workaround until the reserved registers can be
+ // serialized.
+ MF.getRegInfo().freezeReservedRegs(MF);
+ MF.verify();
return false;
}
-bool MIRParserImpl::initializeRegisterInfo(
- const MachineFunction &MF, MachineRegisterInfo &RegInfo,
- const yaml::MachineFunction &YamlMF,
- DenseMap<unsigned, unsigned> &VirtualRegisterSlots) {
+bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF,
+ PerFunctionMIParsingState &PFS) {
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
assert(RegInfo.isSSA());
if (!YamlMF.IsSSA)
RegInfo.leaveSSA();
@@ -351,6 +344,7 @@ bool MIRParserImpl::initializeRegisterInfo(
RegInfo.invalidateLiveness();
RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
+ SMDiagnostic Error;
// Parse the virtual register information.
for (const auto &VReg : YamlMF.VirtualRegisters) {
const auto *RC = getRegClass(MF, VReg.Class.Value);
@@ -359,15 +353,71 @@ bool MIRParserImpl::initializeRegisterInfo(
Twine("use of undefined register class '") +
VReg.Class.Value + "'");
unsigned Reg = RegInfo.createVirtualRegister(RC);
- // TODO: Report an error when the same virtual register with the same ID is
- // redefined.
- VirtualRegisterSlots.insert(std::make_pair(VReg.ID, Reg));
+ if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg))
+ .second)
+ return error(VReg.ID.SourceRange.Start,
+ Twine("redefinition of virtual register '%") +
+ Twine(VReg.ID.Value) + "'");
+ if (!VReg.PreferredRegister.Value.empty()) {
+ unsigned PreferredReg = 0;
+ if (parseNamedRegisterReference(PreferredReg, SM, MF,
+ VReg.PreferredRegister.Value, PFS,
+ IRSlots, Error))
+ return error(Error, VReg.PreferredRegister.SourceRange);
+ RegInfo.setSimpleHint(Reg, PreferredReg);
+ }
}
+
+ // Parse the liveins.
+ for (const auto &LiveIn : YamlMF.LiveIns) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(Reg, SM, MF, LiveIn.Register.Value, PFS,
+ IRSlots, Error))
+ return error(Error, LiveIn.Register.SourceRange);
+ unsigned VReg = 0;
+ if (!LiveIn.VirtualRegister.Value.empty()) {
+ if (parseVirtualRegisterReference(
+ VReg, SM, MF, LiveIn.VirtualRegister.Value, PFS, IRSlots, Error))
+ return error(Error, LiveIn.VirtualRegister.SourceRange);
+ }
+ RegInfo.addLiveIn(Reg, VReg);
+ }
+
+ // Parse the callee saved register mask.
+ BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size());
+ if (!YamlMF.CalleeSavedRegisters)
+ return false;
+ for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(Reg, SM, MF, RegSource.Value, PFS, IRSlots,
+ Error))
+ return error(Error, RegSource.SourceRange);
+ CalleeSavedRegisterMask[Reg] = true;
+ }
+ RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip());
return false;
}
-bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
- const yaml::MachineFunction &YamlMF) {
+void MIRParserImpl::inferRegisterInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF) {
+ if (YamlMF.CalleeSavedRegisters)
+ return;
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ MF.getRegInfo().addPhysRegsUsedFromRegMask(MO.getRegMask());
+ }
+ }
+ }
+}
+
+bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF,
+ PerFunctionMIParsingState &PFS) {
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const Function &F = *MF.getFunction();
const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
@@ -383,7 +433,20 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
MFI.setHasVAStart(YamlMFI.HasVAStart);
MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
+ if (!YamlMFI.SavePoint.Value.empty()) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS))
+ return true;
+ MFI.setSavePoint(MBB);
+ }
+ if (!YamlMFI.RestorePoint.Value.empty()) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS))
+ return true;
+ MFI.setRestorePoint(MBB);
+ }
+ std::vector<CalleeSavedInfo> CSIInfo;
// Initialize the fixed frame objects.
for (const auto &Object : YamlMF.FixedStackObjects) {
int ObjectIdx;
@@ -393,27 +456,190 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
else
ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
- // TODO: Store the mapping between fixed object IDs and object indices to
- // parse fixed stack object references correctly.
+ if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
+ ObjectIdx))
+ .second)
+ return error(Object.ID.SourceRange.Start,
+ Twine("redefinition of fixed stack object '%fixed-stack.") +
+ Twine(Object.ID.Value) + "'");
+ if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
+ ObjectIdx))
+ return true;
}
// Initialize the ordinary frame objects.
for (const auto &Object : YamlMF.StackObjects) {
int ObjectIdx;
+ const AllocaInst *Alloca = nullptr;
+ const yaml::StringValue &Name = Object.Name;
+ if (!Name.Value.empty()) {
+ Alloca = dyn_cast_or_null<AllocaInst>(
+ F.getValueSymbolTable().lookup(Name.Value));
+ if (!Alloca)
+ return error(Name.SourceRange.Start,
+ "alloca instruction named '" + Name.Value +
+ "' isn't defined in the function '" + F.getName() +
+ "'");
+ }
if (Object.Type == yaml::MachineStackObject::VariableSized)
- ObjectIdx =
- MFI.CreateVariableSizedObject(Object.Alignment, /*Alloca=*/nullptr);
+ ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
else
ObjectIdx = MFI.CreateStackObject(
Object.Size, Object.Alignment,
- Object.Type == yaml::MachineStackObject::SpillSlot);
+ Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
MFI.setObjectOffset(ObjectIdx, Object.Offset);
- // TODO: Store the mapping between object IDs and object indices to parse
- // stack object references correctly.
+ if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
+ .second)
+ return error(Object.ID.SourceRange.Start,
+ Twine("redefinition of stack object '%stack.") +
+ Twine(Object.ID.Value) + "'");
+ if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
+ ObjectIdx))
+ return true;
+ if (Object.LocalOffset)
+ MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue());
+ if (parseStackObjectsDebugInfo(MF, PFS, Object, ObjectIdx))
+ return true;
+ }
+ MFI.setCalleeSavedInfo(CSIInfo);
+ if (!CSIInfo.empty())
+ MFI.setCalleeSavedInfoValid(true);
+
+ // Initialize the various stack object references after initializing the
+ // stack objects.
+ if (!YamlMFI.StackProtector.Value.empty()) {
+ SMDiagnostic Error;
+ int FI;
+ if (parseStackObjectReference(FI, SM, MF, YamlMFI.StackProtector.Value, PFS,
+ IRSlots, Error))
+ return error(Error, YamlMFI.StackProtector.SourceRange);
+ MFI.setStackProtectorIndex(FI);
+ }
+ return false;
+}
+
+bool MIRParserImpl::parseCalleeSavedRegister(
+ MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ std::vector<CalleeSavedInfo> &CSIInfo,
+ const yaml::StringValue &RegisterSource, int FrameIdx) {
+ if (RegisterSource.Value.empty())
+ return false;
+ unsigned Reg = 0;
+ SMDiagnostic Error;
+ if (parseNamedRegisterReference(Reg, SM, MF, RegisterSource.Value, PFS,
+ IRSlots, Error))
+ return error(Error, RegisterSource.SourceRange);
+ CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx));
+ return false;
+}
+
+/// Verify that given node is of a certain type. Return true on error.
+template <typename T>
+static bool typecheckMDNode(T *&Result, MDNode *Node,
+ const yaml::StringValue &Source,
+ StringRef TypeString, MIRParserImpl &Parser) {
+ if (!Node)
+ return false;
+ Result = dyn_cast<T>(Node);
+ if (!Result)
+ return Parser.error(Source.SourceRange.Start,
+ "expected a reference to a '" + TypeString +
+ "' metadata node");
+ return false;
+}
+
+bool MIRParserImpl::parseStackObjectsDebugInfo(
+ MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ const yaml::MachineStackObject &Object, int FrameIdx) {
+ // Debug information can only be attached to stack objects; Fixed stack
+ // objects aren't supported.
+ assert(FrameIdx >= 0 && "Expected a stack object frame index");
+ MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr;
+ if (parseMDNode(Var, Object.DebugVar, MF, PFS) ||
+ parseMDNode(Expr, Object.DebugExpr, MF, PFS) ||
+ parseMDNode(Loc, Object.DebugLoc, MF, PFS))
+ return true;
+ if (!Var && !Expr && !Loc)
+ return false;
+ DILocalVariable *DIVar = nullptr;
+ DIExpression *DIExpr = nullptr;
+ DILocation *DILoc = nullptr;
+ if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) ||
+ typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
+ typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
+ return true;
+ MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
+ return false;
+}
+
+bool MIRParserImpl::parseMDNode(MDNode *&Node, const yaml::StringValue &Source,
+ MachineFunction &MF,
+ const PerFunctionMIParsingState &PFS) {
+ if (Source.Value.empty())
+ return false;
+ SMDiagnostic Error;
+ if (llvm::parseMDNode(Node, SM, MF, Source.Value, PFS, IRSlots, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
+bool MIRParserImpl::initializeConstantPool(
+ MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF,
+ const MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &ConstantPoolSlots) {
+ const auto &M = *MF.getFunction()->getParent();
+ SMDiagnostic Error;
+ for (const auto &YamlConstant : YamlMF.Constants) {
+ const Constant *Value = dyn_cast_or_null<Constant>(
+ parseConstantValue(YamlConstant.Value.Value, Error, M));
+ if (!Value)
+ return error(Error, YamlConstant.Value.SourceRange);
+ unsigned Alignment =
+ YamlConstant.Alignment
+ ? YamlConstant.Alignment
+ : M.getDataLayout().getPrefTypeAlignment(Value->getType());
+ unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment);
+ if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index))
+ .second)
+ return error(YamlConstant.ID.SourceRange.Start,
+ Twine("redefinition of constant pool item '%const.") +
+ Twine(YamlConstant.ID.Value) + "'");
}
return false;
}
+bool MIRParserImpl::initializeJumpTableInfo(
+ MachineFunction &MF, const yaml::MachineJumpTable &YamlJTI,
+ PerFunctionMIParsingState &PFS) {
+ MachineJumpTableInfo *JTI = MF.getOrCreateJumpTableInfo(YamlJTI.Kind);
+ for (const auto &Entry : YamlJTI.Entries) {
+ std::vector<MachineBasicBlock *> Blocks;
+ for (const auto &MBBSource : Entry.Blocks) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB, MBBSource.Value, MF, PFS))
+ return true;
+ Blocks.push_back(MBB);
+ }
+ unsigned Index = JTI->createJumpTableIndex(Blocks);
+ if (!PFS.JumpTableSlots.insert(std::make_pair(Entry.ID.Value, Index))
+ .second)
+ return error(Entry.ID.SourceRange.Start,
+ Twine("redefinition of jump table entry '%jump-table.") +
+ Twine(Entry.ID.Value) + "'");
+ }
+ return false;
+}
+
+bool MIRParserImpl::parseMBBReference(MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source,
+ MachineFunction &MF,
+ const PerFunctionMIParsingState &PFS) {
+ SMDiagnostic Error;
+ if (llvm::parseMBBReference(MBB, SM, MF, Source.Value, PFS, IRSlots, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
SMRange SourceRange) {
assert(SourceRange.isValid() && "Invalid source range");
@@ -430,8 +656,8 @@ SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
Error.getFixIts());
}
-SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
- SMRange SourceRange) {
+SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange) {
assert(SourceRange.isValid());
// Translate the location of the error from the location in the llvm IR string
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index d5cf924..175cb0d 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -14,13 +14,20 @@
#include "MIRPrinter.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/YAMLTraits.h"
@@ -31,11 +38,38 @@ using namespace llvm;
namespace {
+/// This structure describes how to print out stack object references.
+struct FrameIndexOperand {
+ std::string Name;
+ unsigned ID;
+ bool IsFixed;
+
+ FrameIndexOperand(StringRef Name, unsigned ID, bool IsFixed)
+ : Name(Name.str()), ID(ID), IsFixed(IsFixed) {}
+
+ /// Return an ordinary stack object reference.
+ static FrameIndexOperand create(StringRef Name, unsigned ID) {
+ return FrameIndexOperand(Name, ID, /*IsFixed=*/false);
+ }
+
+ /// Return a fixed stack object reference.
+ static FrameIndexOperand createFixed(unsigned ID) {
+ return FrameIndexOperand("", ID, /*IsFixed=*/true);
+ }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
/// This class prints out the machine functions using the MIR serialization
/// format.
class MIRPrinter {
raw_ostream &OS;
DenseMap<const uint32_t *, unsigned> RegisterMaskIds;
+ /// Maps from stack object indices to operand indices which will be used when
+ /// printing frame index machine operands.
+ DenseMap<int, FrameIndexOperand> StackObjectOperandMapping;
public:
MIRPrinter(raw_ostream &OS) : OS(OS) {}
@@ -44,11 +78,16 @@ public:
void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo,
const TargetRegisterInfo *TRI);
- void convert(yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI);
- void convert(ModuleSlotTracker &MST, yaml::MachineBasicBlock &YamlMBB,
- const MachineBasicBlock &MBB);
+ void convert(ModuleSlotTracker &MST, yaml::MachineFrameInfo &YamlMFI,
+ const MachineFrameInfo &MFI);
+ void convert(yaml::MachineFunction &MF,
+ const MachineConstantPool &ConstantPool);
+ void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI,
+ const MachineJumpTableInfo &JTI);
void convertStackObjects(yaml::MachineFunction &MF,
- const MachineFrameInfo &MFI);
+ const MachineFrameInfo &MFI, MachineModuleInfo &MMI,
+ ModuleSlotTracker &MST,
+ const TargetRegisterInfo *TRI);
private:
void initRegisterMaskIds(const MachineFunction &MF);
@@ -60,18 +99,32 @@ class MIPrinter {
raw_ostream &OS;
ModuleSlotTracker &MST;
const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
+ const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping;
public:
MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST,
- const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds)
- : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds) {}
+ const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds,
+ const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping)
+ : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds),
+ StackObjectOperandMapping(StackObjectOperandMapping) {}
+
+ void print(const MachineBasicBlock &MBB);
void print(const MachineInstr &MI);
void printMBBReference(const MachineBasicBlock &MBB);
- void print(const MachineOperand &Op, const TargetRegisterInfo *TRI);
+ void printIRBlockReference(const BasicBlock &BB);
+ void printIRValueReference(const Value &V);
+ void printStackObjectReference(int FrameIndex);
+ void printOffset(int64_t Offset);
+ void printTargetFlags(const MachineOperand &Op);
+ void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
+ unsigned I, bool ShouldPrintRegisterTies, bool IsDef = false);
+ void print(const MachineMemOperand &Op);
+
+ void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
};
-} // end anonymous namespace
+} // end namespace llvm
namespace llvm {
namespace yaml {
@@ -103,6 +156,12 @@ static void printReg(unsigned Reg, raw_ostream &OS,
llvm_unreachable("Can't print this kind of register yet");
}
+static void printReg(unsigned Reg, yaml::StringValue &Dest,
+ const TargetRegisterInfo *TRI) {
+ raw_string_ostream OS(Dest.Value);
+ printReg(Reg, OS, TRI);
+}
+
void MIRPrinter::print(const MachineFunction &MF) {
initRegisterMaskIds(MF);
@@ -112,23 +171,25 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
YamlMF.HasInlineAsm = MF.hasInlineAsm();
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
- convert(YamlMF.FrameInfo, *MF.getFrameInfo());
- convertStackObjects(YamlMF, *MF.getFrameInfo());
-
- int I = 0;
ModuleSlotTracker MST(MF.getFunction()->getParent());
+ MST.incorporateFunction(*MF.getFunction());
+ convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo());
+ convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST,
+ MF.getSubtarget().getRegisterInfo());
+ if (const auto *ConstantPool = MF.getConstantPool())
+ convert(YamlMF, *ConstantPool);
+ if (const auto *JumpTableInfo = MF.getJumpTableInfo())
+ convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo);
+ raw_string_ostream StrOS(YamlMF.Body.Value.Value);
+ bool IsNewlineNeeded = false;
for (const auto &MBB : MF) {
- // TODO: Allow printing of non sequentially numbered MBBs.
- // This is currently needed as the basic block references get their index
- // from MBB.getNumber(), thus it should be sequential so that the parser can
- // map back to the correct MBBs when parsing the output.
- assert(MBB.getNumber() == I++ &&
- "Can't print MBBs that aren't sequentially numbered");
- (void)I;
- yaml::MachineBasicBlock YamlMBB;
- convert(MST, YamlMBB, MBB);
- YamlMF.BasicBlocks.push_back(YamlMBB);
+ if (IsNewlineNeeded)
+ StrOS << "\n";
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .print(MBB);
+ IsNewlineNeeded = true;
}
+ StrOS.flush();
yaml::Output Out(OS);
Out << YamlMF;
}
@@ -147,11 +208,38 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
VReg.ID = I;
VReg.Class =
StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+ unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
+ if (PreferredReg)
+ printReg(PreferredReg, VReg.PreferredRegister, TRI);
MF.VirtualRegisters.push_back(VReg);
}
+
+ // Print the live ins.
+ for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) {
+ yaml::MachineFunctionLiveIn LiveIn;
+ printReg(I->first, LiveIn.Register, TRI);
+ if (I->second)
+ printReg(I->second, LiveIn.VirtualRegister, TRI);
+ MF.LiveIns.push_back(LiveIn);
+ }
+ // The used physical register mask is printed as an inverted callee saved
+ // register mask.
+ const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask();
+ if (UsedPhysRegMask.none())
+ return;
+ std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
+ for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) {
+ if (!UsedPhysRegMask[I]) {
+ yaml::FlowStringValue Reg;
+ printReg(I, Reg, TRI);
+ CalleeSavedRegisters.push_back(Reg);
+ }
+ }
+ MF.CalleeSavedRegisters = CalleeSavedRegisters;
}
-void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI,
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+ yaml::MachineFrameInfo &YamlMFI,
const MachineFrameInfo &MFI) {
YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken();
YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken();
@@ -166,10 +254,23 @@ void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI,
YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
YamlMFI.HasVAStart = MFI.hasVAStart();
YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
+ if (MFI.getSavePoint()) {
+ raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printMBBReference(*MFI.getSavePoint());
+ }
+ if (MFI.getRestorePoint()) {
+ raw_string_ostream StrOS(YamlMFI.RestorePoint.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printMBBReference(*MFI.getRestorePoint());
+ }
}
void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
- const MachineFrameInfo &MFI) {
+ const MachineFrameInfo &MFI,
+ MachineModuleInfo &MMI,
+ ModuleSlotTracker &MST,
+ const TargetRegisterInfo *TRI) {
// Process fixed stack objects.
unsigned ID = 0;
for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
@@ -177,7 +278,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
continue;
yaml::FixedMachineStackObject YamlObject;
- YamlObject.ID = ID++;
+ YamlObject.ID = ID;
YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
? yaml::FixedMachineStackObject::SpillSlot
: yaml::FixedMachineStackObject::DefaultType;
@@ -187,8 +288,8 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
MF.FixedStackObjects.push_back(YamlObject);
- // TODO: Store the mapping between fixed object IDs and object indices to
- // print the fixed stack object references correctly.
+ StackObjectOperandMapping.insert(
+ std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
}
// Process ordinary stack objects.
@@ -198,7 +299,10 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
continue;
yaml::MachineStackObject YamlObject;
- YamlObject.ID = ID++;
+ YamlObject.ID = ID;
+ if (const auto *Alloca = MFI.getObjectAllocation(I))
+ YamlObject.Name.Value =
+ Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>";
YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
? yaml::MachineStackObject::SpillSlot
: MFI.isVariableSizedObjectIndex(I)
@@ -209,47 +313,100 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
YamlObject.Alignment = MFI.getObjectAlignment(I);
MF.StackObjects.push_back(YamlObject);
- // TODO: Store the mapping between object IDs and object indices to print
- // the stack object references correctly.
+ StackObjectOperandMapping.insert(std::make_pair(
+ I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
+ }
+
+ for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
+ yaml::StringValue Reg;
+ printReg(CSInfo.getReg(), Reg, TRI);
+ auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ if (StackObject.IsFixed)
+ MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ else
+ MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ }
+ for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
+ auto LocalObject = MFI.getLocalFrameObjectMap(I);
+ auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first);
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ assert(!StackObject.IsFixed && "Expected a locally mapped stack object");
+ MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
+ }
+
+ // Print the stack object references in the frame information class after
+ // converting the stack objects.
+ if (MFI.hasStackProtectorIndex()) {
+ raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printStackObjectReference(MFI.getStackProtectorIndex());
+ }
+
+ // Print the debug variable information.
+ for (MachineModuleInfo::VariableDbgInfo &DebugVar :
+ MMI.getVariableDbgInfo()) {
+ auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot);
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ assert(!StackObject.IsFixed && "Expected a non-fixed stack object");
+ auto &Object = MF.StackObjects[StackObject.ID];
+ {
+ raw_string_ostream StrOS(Object.DebugVar.Value);
+ DebugVar.Var->printAsOperand(StrOS, MST);
+ }
+ {
+ raw_string_ostream StrOS(Object.DebugExpr.Value);
+ DebugVar.Expr->printAsOperand(StrOS, MST);
+ }
+ {
+ raw_string_ostream StrOS(Object.DebugLoc.Value);
+ DebugVar.Loc->printAsOperand(StrOS, MST);
+ }
}
}
-void MIRPrinter::convert(ModuleSlotTracker &MST,
- yaml::MachineBasicBlock &YamlMBB,
- const MachineBasicBlock &MBB) {
- assert(MBB.getNumber() >= 0 && "Invalid MBB number");
- YamlMBB.ID = (unsigned)MBB.getNumber();
- // TODO: Serialize unnamed BB references.
- if (const auto *BB = MBB.getBasicBlock())
- YamlMBB.Name.Value = BB->hasName() ? BB->getName() : "<unnamed bb>";
- else
- YamlMBB.Name.Value = "";
- YamlMBB.Alignment = MBB.getAlignment();
- YamlMBB.AddressTaken = MBB.hasAddressTaken();
- YamlMBB.IsLandingPad = MBB.isLandingPad();
- for (const auto *SuccMBB : MBB.successors()) {
+void MIRPrinter::convert(yaml::MachineFunction &MF,
+ const MachineConstantPool &ConstantPool) {
+ unsigned ID = 0;
+ for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) {
+ // TODO: Serialize target specific constant pool entries.
+ if (Constant.isMachineConstantPoolEntry())
+ llvm_unreachable("Can't print target specific constant pool entries yet");
+
+ yaml::MachineConstantPoolValue YamlConstant;
std::string Str;
raw_string_ostream StrOS(Str);
- MIPrinter(StrOS, MST, RegisterMaskIds).printMBBReference(*SuccMBB);
- YamlMBB.Successors.push_back(StrOS.str());
+ Constant.Val.ConstVal->printAsOperand(StrOS);
+ YamlConstant.ID = ID++;
+ YamlConstant.Value = StrOS.str();
+ YamlConstant.Alignment = Constant.getAlignment();
+ MF.Constants.push_back(YamlConstant);
}
- // Print the live in registers.
- const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- for (auto I = MBB.livein_begin(), E = MBB.livein_end(); I != E; ++I) {
+}
+
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+ yaml::MachineJumpTable &YamlJTI,
+ const MachineJumpTableInfo &JTI) {
+ YamlJTI.Kind = JTI.getEntryKind();
+ unsigned ID = 0;
+ for (const auto &Table : JTI.getJumpTables()) {
std::string Str;
- raw_string_ostream StrOS(Str);
- printReg(*I, StrOS, TRI);
- YamlMBB.LiveIns.push_back(StrOS.str());
- }
- // Print the machine instructions.
- YamlMBB.Instructions.reserve(MBB.size());
- std::string Str;
- for (const auto &MI : MBB) {
- raw_string_ostream StrOS(Str);
- MIPrinter(StrOS, MST, RegisterMaskIds).print(MI);
- YamlMBB.Instructions.push_back(StrOS.str());
- Str.clear();
+ yaml::MachineJumpTable::Entry Entry;
+ Entry.ID = ID++;
+ for (const auto *MBB : Table.MBBs) {
+ raw_string_ostream StrOS(Str);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printMBBReference(*MBB);
+ Entry.Blocks.push_back(StrOS.str());
+ Str.clear();
+ }
+ YamlJTI.Entries.push_back(Entry);
}
}
@@ -260,26 +417,137 @@ void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) {
RegisterMaskIds.insert(std::make_pair(Mask, I++));
}
+void MIPrinter::print(const MachineBasicBlock &MBB) {
+ assert(MBB.getNumber() >= 0 && "Invalid MBB number");
+ OS << "bb." << MBB.getNumber();
+ bool HasAttributes = false;
+ if (const auto *BB = MBB.getBasicBlock()) {
+ if (BB->hasName()) {
+ OS << "." << BB->getName();
+ } else {
+ HasAttributes = true;
+ OS << " (";
+ int Slot = MST.getLocalSlot(BB);
+ if (Slot == -1)
+ OS << "<ir-block badref>";
+ else
+ OS << (Twine("%ir-block.") + Twine(Slot)).str();
+ }
+ }
+ if (MBB.hasAddressTaken()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "address-taken";
+ HasAttributes = true;
+ }
+ if (MBB.isEHPad()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "landing-pad";
+ HasAttributes = true;
+ }
+ if (MBB.getAlignment()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "align " << MBB.getAlignment();
+ HasAttributes = true;
+ }
+ if (HasAttributes)
+ OS << ")";
+ OS << ":\n";
+
+ bool HasLineAttributes = false;
+ // Print the successors
+ if (!MBB.succ_empty()) {
+ OS.indent(2) << "successors: ";
+ for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
+ if (I != MBB.succ_begin())
+ OS << ", ";
+ printMBBReference(**I);
+ if (MBB.hasSuccessorProbabilities())
+ OS << '(' << MBB.getSuccProbability(I) << ')';
+ }
+ OS << "\n";
+ HasLineAttributes = true;
+ }
+
+ // Print the live in registers.
+ const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ if (!MBB.livein_empty()) {
+ OS.indent(2) << "liveins: ";
+ bool First = true;
+ for (const auto &LI : MBB.liveins()) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ printReg(LI.PhysReg, OS, TRI);
+ if (LI.LaneMask != ~0u)
+ OS << ':' << PrintLaneMask(LI.LaneMask);
+ }
+ OS << "\n";
+ HasLineAttributes = true;
+ }
+
+ if (HasLineAttributes)
+ OS << "\n";
+ bool IsInBundle = false;
+ for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (IsInBundle && !MI.isInsideBundle()) {
+ OS.indent(2) << "}\n";
+ IsInBundle = false;
+ }
+ OS.indent(IsInBundle ? 4 : 2);
+ print(MI);
+ if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) {
+ OS << " {";
+ IsInBundle = true;
+ }
+ OS << "\n";
+ }
+ if (IsInBundle)
+ OS.indent(2) << "}\n";
+}
+
+/// Return true when an instruction has tied register that can't be determined
+/// by the instruction's descriptor.
+static bool hasComplexRegisterTies(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ const auto &Operand = MI.getOperand(I);
+ if (!Operand.isReg() || Operand.isDef())
+ // Ignore the defined registers as MCID marks only the uses as tied.
+ continue;
+ int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
+ int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1;
+ if (ExpectedTiedIdx != TiedIdx)
+ return true;
+ }
+ return false;
+}
+
void MIPrinter::print(const MachineInstr &MI) {
const auto &SubTarget = MI.getParent()->getParent()->getSubtarget();
const auto *TRI = SubTarget.getRegisterInfo();
assert(TRI && "Expected target register info");
const auto *TII = SubTarget.getInstrInfo();
assert(TII && "Expected target instruction info");
+ if (MI.isCFIInstruction())
+ assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
+ bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
unsigned I = 0, E = MI.getNumOperands();
for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
!MI.getOperand(I).isImplicit();
++I) {
if (I)
OS << ", ";
- print(MI.getOperand(I), TRI);
+ print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, /*IsDef=*/true);
}
if (I)
OS << " = ";
+ if (MI.getFlag(MachineInstr::FrameSetup))
+ OS << "frame-setup ";
OS << TII->getName(MI.getOpcode());
- // TODO: Print the instruction flags, machine mem operands.
if (I < E)
OS << ' ';
@@ -287,9 +555,27 @@ void MIPrinter::print(const MachineInstr &MI) {
for (; I < E; ++I) {
if (NeedComma)
OS << ", ";
- print(MI.getOperand(I), TRI);
+ print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies);
NeedComma = true;
}
+
+ if (MI.getDebugLoc()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " debug-location ";
+ MI.getDebugLoc()->printAsOperand(OS, MST);
+ }
+
+ if (!MI.memoperands_empty()) {
+ OS << " :: ";
+ bool NeedComma = false;
+ for (const auto *Op : MI.memoperands()) {
+ if (NeedComma)
+ OS << ", ";
+ print(*Op);
+ NeedComma = true;
+ }
+ }
}
void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
@@ -300,32 +586,225 @@ void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
}
}
-void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
+static void printIRSlotNumber(raw_ostream &OS, int Slot) {
+ if (Slot == -1)
+ OS << "<badref>";
+ else
+ OS << Slot;
+}
+
+void MIPrinter::printIRBlockReference(const BasicBlock &BB) {
+ OS << "%ir-block.";
+ if (BB.hasName()) {
+ printLLVMNameWithoutPrefix(OS, BB.getName());
+ return;
+ }
+ const Function *F = BB.getParent();
+ int Slot;
+ if (F == MST.getCurrentFunction()) {
+ Slot = MST.getLocalSlot(&BB);
+ } else {
+ ModuleSlotTracker CustomMST(F->getParent(),
+ /*ShouldInitializeAllMetadata=*/false);
+ CustomMST.incorporateFunction(*F);
+ Slot = CustomMST.getLocalSlot(&BB);
+ }
+ printIRSlotNumber(OS, Slot);
+}
+
+void MIPrinter::printIRValueReference(const Value &V) {
+ if (isa<GlobalValue>(V)) {
+ V.printAsOperand(OS, /*PrintType=*/false, MST);
+ return;
+ }
+ if (isa<Constant>(V)) {
+ // Machine memory operands can load/store to/from constant value pointers.
+ OS << '`';
+ V.printAsOperand(OS, /*PrintType=*/true, MST);
+ OS << '`';
+ return;
+ }
+ OS << "%ir.";
+ if (V.hasName()) {
+ printLLVMNameWithoutPrefix(OS, V.getName());
+ return;
+ }
+ printIRSlotNumber(OS, MST.getLocalSlot(&V));
+}
+
+void MIPrinter::printStackObjectReference(int FrameIndex) {
+ auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex);
+ assert(ObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid frame index");
+ const FrameIndexOperand &Operand = ObjectInfo->second;
+ if (Operand.IsFixed) {
+ OS << "%fixed-stack." << Operand.ID;
+ return;
+ }
+ OS << "%stack." << Operand.ID;
+ if (!Operand.Name.empty())
+ OS << '.' << Operand.Name;
+}
+
+void MIPrinter::printOffset(int64_t Offset) {
+ if (Offset == 0)
+ return;
+ if (Offset < 0) {
+ OS << " - " << -Offset;
+ return;
+ }
+ OS << " + " << Offset;
+}
+
+static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags) {
+ if (I.first == TF) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+void MIPrinter::printTargetFlags(const MachineOperand &Op) {
+ if (!Op.getTargetFlags())
+ return;
+ const auto *TII =
+ Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags());
+ OS << "target-flags(";
+ const bool HasDirectFlags = Flags.first;
+ const bool HasBitmaskFlags = Flags.second;
+ if (!HasDirectFlags && !HasBitmaskFlags) {
+ OS << "<unknown>) ";
+ return;
+ }
+ if (HasDirectFlags) {
+ if (const auto *Name = getTargetFlagName(TII, Flags.first))
+ OS << Name;
+ else
+ OS << "<unknown target flag>";
+ }
+ if (!HasBitmaskFlags) {
+ OS << ") ";
+ return;
+ }
+ bool IsCommaNeeded = HasDirectFlags;
+ unsigned BitMask = Flags.second;
+ auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &Mask : BitMasks) {
+ // Check if the flag's bitmask has the bits of the current mask set.
+ if ((BitMask & Mask.first) == Mask.first) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ IsCommaNeeded = true;
+ OS << Mask.second;
+ // Clear the bits which were serialized from the flag's bitmask.
+ BitMask &= ~(Mask.first);
+ }
+ }
+ if (BitMask) {
+ // When the resulting flag's bitmask isn't zero, we know that we didn't
+ // serialize all of the bit flags.
+ if (IsCommaNeeded)
+ OS << ", ";
+ OS << "<unknown bitmask target flag>";
+ }
+ OS << ") ";
+}
+
+static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ for (const auto &I : Indices) {
+ if (I.first == Index) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
+ unsigned I, bool ShouldPrintRegisterTies, bool IsDef) {
+ printTargetFlags(Op);
switch (Op.getType()) {
case MachineOperand::MO_Register:
- // TODO: Print the other register flags.
if (Op.isImplicit())
OS << (Op.isDef() ? "implicit-def " : "implicit ");
+ else if (!IsDef && Op.isDef())
+ // Print the 'def' flag only when the operand is defined after '='.
+ OS << "def ";
+ if (Op.isInternalRead())
+ OS << "internal ";
if (Op.isDead())
OS << "dead ";
if (Op.isKill())
OS << "killed ";
if (Op.isUndef())
OS << "undef ";
+ if (Op.isEarlyClobber())
+ OS << "early-clobber ";
+ if (Op.isDebug())
+ OS << "debug-use ";
printReg(Op.getReg(), OS, TRI);
// Print the sub register.
if (Op.getSubReg() != 0)
OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
+ if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
+ OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
break;
case MachineOperand::MO_Immediate:
OS << Op.getImm();
break;
+ case MachineOperand::MO_CImmediate:
+ Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
+ case MachineOperand::MO_FPImmediate:
+ Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
case MachineOperand::MO_MachineBasicBlock:
printMBBReference(*Op.getMBB());
break;
+ case MachineOperand::MO_FrameIndex:
+ printStackObjectReference(Op.getIndex());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "%const." << Op.getIndex();
+ printOffset(Op.getOffset());
+ break;
+ case MachineOperand::MO_TargetIndex: {
+ OS << "target-index(";
+ if (const auto *Name = getTargetIndexName(
+ *Op.getParent()->getParent()->getParent(), Op.getIndex()))
+ OS << Name;
+ else
+ OS << "<unknown>";
+ OS << ')';
+ printOffset(Op.getOffset());
+ break;
+ }
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "%jump-table." << Op.getIndex();
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << '$';
+ printLLVMNameWithoutPrefix(OS, Op.getSymbolName());
+ printOffset(Op.getOffset());
+ break;
case MachineOperand::MO_GlobalAddress:
Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
- // TODO: Print offset and target flags.
+ printOffset(Op.getOffset());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ OS << "blockaddress(";
+ Op.getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false,
+ MST);
+ OS << ", ";
+ printIRBlockReference(*Op.getBlockAddress()->getBasicBlock());
+ OS << ')';
+ printOffset(Op.getOffset());
break;
case MachineOperand::MO_RegisterMask: {
auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask());
@@ -335,9 +814,157 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
llvm_unreachable("Can't print this machine register mask yet.");
break;
}
+ case MachineOperand::MO_RegisterLiveOut: {
+ const uint32_t *RegMask = Op.getRegLiveOut();
+ OS << "liveout(";
+ bool IsCommaNeeded = false;
+ for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) {
+ if (RegMask[Reg / 32] & (1U << (Reg % 32))) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ printReg(Reg, OS, TRI);
+ IsCommaNeeded = true;
+ }
+ }
+ OS << ")";
+ break;
+ }
+ case MachineOperand::MO_Metadata:
+ Op.getMetadata()->printAsOperand(OS, MST);
+ break;
+ case MachineOperand::MO_MCSymbol:
+ OS << "<mcsymbol " << *Op.getMCSymbol() << ">";
+ break;
+ case MachineOperand::MO_CFIIndex: {
+ const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI();
+ print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI);
+ break;
+ }
+ }
+}
+
+void MIPrinter::print(const MachineMemOperand &Op) {
+ OS << '(';
+ // TODO: Print operand's target specific flags.
+ if (Op.isVolatile())
+ OS << "volatile ";
+ if (Op.isNonTemporal())
+ OS << "non-temporal ";
+ if (Op.isInvariant())
+ OS << "invariant ";
+ if (Op.isLoad())
+ OS << "load ";
+ else {
+ assert(Op.isStore() && "Non load machine operand must be a store");
+ OS << "store ";
+ }
+ OS << Op.getSize() << (Op.isLoad() ? " from " : " into ");
+ if (const Value *Val = Op.getValue()) {
+ printIRValueReference(*Val);
+ } else {
+ const PseudoSourceValue *PVal = Op.getPseudoValue();
+ assert(PVal && "Expected a pseudo source value");
+ switch (PVal->kind()) {
+ case PseudoSourceValue::Stack:
+ OS << "stack";
+ break;
+ case PseudoSourceValue::GOT:
+ OS << "got";
+ break;
+ case PseudoSourceValue::JumpTable:
+ OS << "jump-table";
+ break;
+ case PseudoSourceValue::ConstantPool:
+ OS << "constant-pool";
+ break;
+ case PseudoSourceValue::FixedStack:
+ printStackObjectReference(
+ cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex());
+ break;
+ case PseudoSourceValue::GlobalValueCallEntry:
+ OS << "call-entry ";
+ cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand(
+ OS, /*PrintType=*/false, MST);
+ break;
+ case PseudoSourceValue::ExternalSymbolCallEntry:
+ OS << "call-entry $";
+ printLLVMNameWithoutPrefix(
+ OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
+ break;
+ }
+ }
+ printOffset(Op.getOffset());
+ if (Op.getBaseAlignment() != Op.getSize())
+ OS << ", align " << Op.getBaseAlignment();
+ auto AAInfo = Op.getAAInfo();
+ if (AAInfo.TBAA) {
+ OS << ", !tbaa ";
+ AAInfo.TBAA->printAsOperand(OS, MST);
+ }
+ if (AAInfo.Scope) {
+ OS << ", !alias.scope ";
+ AAInfo.Scope->printAsOperand(OS, MST);
+ }
+ if (AAInfo.NoAlias) {
+ OS << ", !noalias ";
+ AAInfo.NoAlias->printAsOperand(OS, MST);
+ }
+ if (Op.getRanges()) {
+ OS << ", !range ";
+ Op.getRanges()->printAsOperand(OS, MST);
+ }
+ OS << ')';
+}
+
+static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ int Reg = TRI->getLLVMRegNum(DwarfReg, true);
+ if (Reg == -1) {
+ OS << "<badreg>";
+ return;
+ }
+ printReg(Reg, OS, TRI);
+}
+
+void MIPrinter::print(const MCCFIInstruction &CFI,
+ const TargetRegisterInfo *TRI) {
+ switch (CFI.getOperation()) {
+ case MCCFIInstruction::OpSameValue:
+ OS << ".cfi_same_value ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpOffset:
+ OS << ".cfi_offset ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfaRegister:
+ OS << ".cfi_def_cfa_register ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpDefCfaOffset:
+ OS << ".cfi_def_cfa_offset ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ OS << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfa:
+ OS << ".cfi_def_cfa ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
default:
- // TODO: Print the other machine operands.
- llvm_unreachable("Can't print this machine operand at the moment");
+ // TODO: Print the other CFI Operations.
+ OS << "<unserializable cfi operation>";
+ break;
}
}
diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
index 13d61e6..8e7566a 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -40,7 +40,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
MachineFunctionPass::getAnalysisUsage(AU);
}
- virtual bool runOnMachineFunction(MachineFunction &MF) override {
+ bool runOnMachineFunction(MachineFunction &MF) override {
std::string Str;
raw_string_ostream StrOS(Str);
printMIR(StrOS, MF);
@@ -48,7 +48,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
return false;
}
- virtual bool doFinalization(Module &M) override {
+ bool doFinalization(Module &M) override {
printMIR(OS, M);
OS << MachineFunctions;
return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 5d3f7eb..76099f2 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -38,22 +39,21 @@ using namespace llvm;
#define DEBUG_TYPE "codegen"
-MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
- : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
- AddressTaken(false), CachedMCSymbol(nullptr) {
+MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
+ : BB(B), Number(-1), xParent(&MF) {
Insts.Parent = this;
}
MachineBasicBlock::~MachineBasicBlock() {
}
-/// getSymbol - Return the MCSymbol for this basic block.
-///
+/// Return the MCSymbol for this basic block.
MCSymbol *MachineBasicBlock::getSymbol() const {
if (!CachedMCSymbol) {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+ assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
Twine(MF->getFunctionNumber()) +
"_" + Twine(getNumber()));
@@ -68,9 +68,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
return OS;
}
-/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
-/// parent pointer of the MBB, the MBB numbering, and any instructions in the
-/// MBB to be on the right operand list for registers.
+/// When an MBB is added to an MF, we need to update the parent pointer of the
+/// MBB, the MBB numbering, and any instructions in the MBB to be on the right
+/// operand list for registers.
///
/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
/// gets the next available unique MBB number. If it is removed from a
@@ -91,10 +91,8 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
N->Number = -1;
}
-
-/// addNodeToList (MI) - When we add an instruction to a basic block
-/// list, we update its parent pointer and add its operands from reg use/def
-/// lists if appropriate.
+/// When we add an instruction to a basic block list, we update its parent
+/// pointer and add its operands from reg use/def lists if appropriate.
void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
assert(!N->getParent() && "machine instruction already in a basic block");
N->setParent(Parent);
@@ -105,9 +103,8 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
N->AddRegOperandsToUseLists(MF->getRegInfo());
}
-/// removeNodeFromList (MI) - When we remove an instruction from a basic block
-/// list, we update its parent pointer and remove its operands from reg use/def
-/// lists if appropriate.
+/// When we remove an instruction from a basic block list, we update its parent
+/// pointer and remove its operands from reg use/def lists if appropriate.
void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
assert(N->getParent() && "machine instruction not in a basic block");
@@ -118,23 +115,22 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
N->setParent(nullptr);
}
-/// transferNodesFromList (MI) - When moving a range of instructions from one
-/// MBB list to another, we need to update the parent pointers and the use/def
-/// lists.
+/// When moving a range of instructions from one MBB list to another, we need to
+/// update the parent pointers and the use/def lists.
void ilist_traits<MachineInstr>::
-transferNodesFromList(ilist_traits<MachineInstr> &fromList,
- ilist_iterator<MachineInstr> first,
- ilist_iterator<MachineInstr> last) {
- assert(Parent->getParent() == fromList.Parent->getParent() &&
+transferNodesFromList(ilist_traits<MachineInstr> &FromList,
+ ilist_iterator<MachineInstr> First,
+ ilist_iterator<MachineInstr> Last) {
+ assert(Parent->getParent() == FromList.Parent->getParent() &&
"MachineInstr parent mismatch!");
// Splice within the same MBB -> no change.
- if (Parent == fromList.Parent) return;
+ if (Parent == FromList.Parent) return;
// If splicing between two blocks within the same function, just update the
// parent pointers.
- for (; first != last; ++first)
- first->setParent(Parent);
+ for (; First != Last; ++First)
+ First->setParent(Parent);
}
void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
@@ -208,11 +204,18 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
if (succ_size() > 2)
return nullptr;
for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
- if ((*I)->isLandingPad())
+ if ((*I)->isEHPad())
return *I;
return nullptr;
}
+bool MachineBasicBlock::hasEHPadSuccessor() const {
+ for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+ if ((*I)->isEHPad())
+ return true;
+ return false;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineBasicBlock::dump() const {
print(dbgs());
@@ -271,7 +274,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
LBB->printAsOperand(OS, /*PrintType=*/false, MST);
Comma = ", ";
}
- if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+ if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
if (Alignment)
OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
@@ -283,8 +286,11 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (!livein_empty()) {
if (Indexes) OS << '\t';
OS << " Live Ins:";
- for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
- OS << ' ' << PrintReg(*I, TRI);
+ for (const auto &LI : make_range(livein_begin(), livein_end())) {
+ OS << ' ' << PrintReg(LI.PhysReg, TRI);
+ if (LI.LaneMask != ~0u)
+ OS << ':' << PrintLaneMask(LI.LaneMask);
+ }
OS << '\n';
}
// Print the preds of this block according to the CFG.
@@ -298,8 +304,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
if (Indexes) {
- if (Indexes->hasIndex(I))
- OS << Indexes->getInstructionIndex(I);
+ if (Indexes->hasIndex(&*I))
+ OS << Indexes->getInstructionIndex(&*I);
OS << '\t';
}
OS << '\t';
@@ -314,35 +320,63 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << " Successors according to CFG:";
for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
OS << " BB#" << (*SI)->getNumber();
- if (!Weights.empty())
- OS << '(' << *getWeightIterator(SI) << ')';
+ if (!Probs.empty())
+ OS << '(' << *getProbabilityIterator(SI) << ')';
}
OS << '\n';
}
}
-void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const {
+void MachineBasicBlock::printAsOperand(raw_ostream &OS,
+ bool /*PrintType*/) const {
OS << "BB#" << getNumber();
}
-void MachineBasicBlock::removeLiveIn(unsigned Reg) {
- std::vector<unsigned>::iterator I =
- std::find(LiveIns.begin(), LiveIns.end(), Reg);
- if (I != LiveIns.end())
+void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
+ LiveInVector::iterator I = std::find_if(
+ LiveIns.begin(), LiveIns.end(),
+ [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ if (I == LiveIns.end())
+ return;
+
+ I->LaneMask &= ~LaneMask;
+ if (I->LaneMask == 0)
LiveIns.erase(I);
}
-bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
- livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
- return I != livein_end();
+bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
+ livein_iterator I = std::find_if(
+ LiveIns.begin(), LiveIns.end(),
+ [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ return I != livein_end() && (I->LaneMask & LaneMask) != 0;
+}
+
+void MachineBasicBlock::sortUniqueLiveIns() {
+ std::sort(LiveIns.begin(), LiveIns.end(),
+ [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
+ return LI0.PhysReg < LI1.PhysReg;
+ });
+ // Liveins are sorted by physreg now we can merge their lanemasks.
+ LiveInVector::const_iterator I = LiveIns.begin();
+ LiveInVector::const_iterator J;
+ LiveInVector::iterator Out = LiveIns.begin();
+ for (; I != LiveIns.end(); ++Out, I = J) {
+ unsigned PhysReg = I->PhysReg;
+ LaneBitmask LaneMask = I->LaneMask;
+ for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J)
+ LaneMask |= J->LaneMask;
+ Out->PhysReg = PhysReg;
+ Out->LaneMask = LaneMask;
+ }
+ LiveIns.erase(Out, LiveIns.end());
}
unsigned
-MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) {
+MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) {
assert(getParent() && "MBB must be inserted in function");
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg");
assert(RC && "Register class is required");
- assert((isLandingPad() || this == &getParent()->front()) &&
+ assert((isEHPad() || this == &getParent()->front()) &&
"Only the entry block and landing pads can have physreg live ins");
bool LiveIn = isLiveIn(PhysReg);
@@ -370,12 +404,11 @@ MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) {
}
void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
- getParent()->splice(NewAfter, this);
+ getParent()->splice(NewAfter->getIterator(), getIterator());
}
void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
- MachineFunction::iterator BBI = NewBefore;
- getParent()->splice(++BBI, this);
+ getParent()->splice(++NewBefore->getIterator(), getIterator());
}
void MachineBasicBlock::updateTerminator() {
@@ -385,7 +418,7 @@ void MachineBasicBlock::updateTerminator() {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc DL; // FIXME: this is nowhere
bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
(void) B;
assert(!B && "UpdateTerminators requires analyzable predecessors!");
@@ -400,7 +433,7 @@ void MachineBasicBlock::updateTerminator() {
// its layout successor, insert a branch. First we have to locate the
// only non-landing-pad successor, as that is the fallthrough block.
for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
- if ((*SI)->isLandingPad())
+ if ((*SI)->isEHPad())
continue;
assert(!TBB && "Found more than one non-landing-pad successor!");
TBB = *SI;
@@ -414,7 +447,7 @@ void MachineBasicBlock::updateTerminator() {
// Finally update the unconditional successor to be reached via a branch
// if it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
}
} else {
if (FBB) {
@@ -425,10 +458,10 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond))
return;
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, FBB, nullptr, Cond, DL);
} else if (isLayoutSuccessor(FBB)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
}
} else {
// Walk through the successors and find the successor which is not
@@ -436,7 +469,7 @@ void MachineBasicBlock::updateTerminator() {
// as the fallthrough successor.
MachineBasicBlock *FallthroughBB = nullptr;
for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
- if ((*SI)->isLandingPad() || *SI == TBB)
+ if ((*SI)->isEHPad() || *SI == TBB)
continue;
assert(!FallthroughBB && "Found more than one fallthrough successor.");
FallthroughBB = *SI;
@@ -445,14 +478,14 @@ void MachineBasicBlock::updateTerminator() {
// We fallthrough to the same basic block as the conditional jump
// targets. Remove the conditional jump, leaving unconditional
// fallthrough.
- // FIXME: This does not seem like a reasonable pattern to support, but it
- // has been seen in the wild coming out of degenerate ARM test cases.
+ // FIXME: This does not seem like a reasonable pattern to support, but
+ // it has been seen in the wild coming out of degenerate ARM test cases.
TII->RemoveBranch(*this);
// Finally update the unconditional successor to be reached via a branch
// if it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
return;
}
@@ -461,55 +494,69 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond)) {
// We can't reverse the condition, add an unconditional branch.
Cond.clear();
- TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
return;
}
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
} else if (!isLayoutSuccessor(FallthroughBB)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl);
+ TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL);
}
}
}
}
-void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
-
- // If we see non-zero value for the first time it means we actually use Weight
- // list, so we fill all Weights with 0's.
- if (weight != 0 && Weights.empty())
- Weights.resize(Successors.size());
-
- if (weight != 0 || !Weights.empty())
- Weights.push_back(weight);
-
- Successors.push_back(succ);
- succ->addPredecessor(this);
- }
+void MachineBasicBlock::validateSuccProbs() const {
+#ifndef NDEBUG
+ int64_t Sum = 0;
+ for (auto Prob : Probs)
+ Sum += Prob.getNumerator();
+ // Due to precision issue, we assume that the sum of probabilities is one if
+ // the difference between the sum of their numerators and the denominator is
+ // no greater than the number of successors.
+ assert((uint64_t)std::abs(Sum - BranchProbability::getDenominator()) <=
+ Probs.size() &&
+ "The sum of successors's probabilities exceeds one.");
+#endif // NDEBUG
+}
-void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
- succ->removePredecessor(this);
- succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
- assert(I != Successors.end() && "Not a current successor!");
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *Succ,
+ BranchProbability Prob) {
+ // Probability list is either empty (if successor list isn't empty, this means
+ // disabled optimization) or has the same size as successor list.
+ if (!(Probs.empty() && !Successors.empty()))
+ Probs.push_back(Prob);
+ Successors.push_back(Succ);
+ Succ->addPredecessor(this);
+}
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!Weights.empty()) {
- weight_iterator WI = getWeightIterator(I);
- Weights.erase(WI);
- }
+void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {
+ // We need to make sure probability list is either empty or has the same size
+ // of successor list. When this function is called, we can safely delete all
+ // probability in the list.
+ Probs.clear();
+ Successors.push_back(Succ);
+ Succ->addPredecessor(this);
+}
- Successors.erase(I);
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,
+ bool NormalizeSuccProbs) {
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ);
+ removeSuccessor(I, NormalizeSuccProbs);
}
MachineBasicBlock::succ_iterator
-MachineBasicBlock::removeSuccessor(succ_iterator I) {
+MachineBasicBlock::removeSuccessor(succ_iterator I, bool NormalizeSuccProbs) {
assert(I != Successors.end() && "Not a current successor!");
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!Weights.empty()) {
- weight_iterator WI = getWeightIterator(I);
- Weights.erase(WI);
+ // If probability list is empty it means we don't use it (disabled
+ // optimization).
+ if (!Probs.empty()) {
+ probability_iterator WI = getProbabilityIterator(I);
+ Probs.erase(WI);
+ if (NormalizeSuccProbs)
+ normalizeSuccProbs();
}
(*I)->removePredecessor(this);
@@ -537,74 +584,77 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
}
}
assert(OldI != E && "Old is not a successor of this block");
- Old->removePredecessor(this);
// If New isn't already a successor, let it take Old's place.
if (NewI == E) {
+ Old->removePredecessor(this);
New->addPredecessor(this);
*OldI = New;
return;
}
// New is already a successor.
- // Update its weight instead of adding a duplicate edge.
- if (!Weights.empty()) {
- weight_iterator OldWI = getWeightIterator(OldI);
- *getWeightIterator(NewI) += *OldWI;
- Weights.erase(OldWI);
+ // Update its probability instead of adding a duplicate edge.
+ if (!Probs.empty()) {
+ auto ProbIter = getProbabilityIterator(NewI);
+ if (!ProbIter->isUnknown())
+ *ProbIter += *getProbabilityIterator(OldI);
}
- Successors.erase(OldI);
+ removeSuccessor(OldI);
}
-void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
- Predecessors.push_back(pred);
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
+ Predecessors.push_back(Pred);
}
-void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
- pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) {
+ pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred);
assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
Predecessors.erase(I);
}
-void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
- if (this == fromMBB)
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) {
+ if (this == FromMBB)
return;
- while (!fromMBB->succ_empty()) {
- MachineBasicBlock *Succ = *fromMBB->succ_begin();
- uint32_t Weight = 0;
+ while (!FromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *FromMBB->succ_begin();
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!fromMBB->Weights.empty())
- Weight = *fromMBB->Weights.begin();
+ // If probability list is empty it means we don't use it (disabled optimization).
+ if (!FromMBB->Probs.empty()) {
+ auto Prob = *FromMBB->Probs.begin();
+ addSuccessor(Succ, Prob);
+ } else
+ addSuccessorWithoutProb(Succ);
- addSuccessor(Succ, Weight);
- fromMBB->removeSuccessor(Succ);
+ FromMBB->removeSuccessor(Succ);
}
}
void
-MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
- if (this == fromMBB)
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
+ if (this == FromMBB)
return;
- while (!fromMBB->succ_empty()) {
- MachineBasicBlock *Succ = *fromMBB->succ_begin();
- uint32_t Weight = 0;
- if (!fromMBB->Weights.empty())
- Weight = *fromMBB->Weights.begin();
- addSuccessor(Succ, Weight);
- fromMBB->removeSuccessor(Succ);
+ while (!FromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *FromMBB->succ_begin();
+ if (!FromMBB->Probs.empty()) {
+ auto Prob = *FromMBB->Probs.begin();
+ addSuccessor(Succ, Prob);
+ } else
+ addSuccessorWithoutProb(Succ);
+ FromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
MachineOperand &MO = MI->getOperand(i);
- if (MO.getMBB() == fromMBB)
+ if (MO.getMBB() == FromMBB)
MO.setMBB(this);
}
}
+ normalizeSuccProbs();
}
bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
@@ -621,14 +671,14 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
}
bool MachineBasicBlock::canFallThrough() {
- MachineFunction::iterator Fallthrough = this;
+ MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
// If FallthroughBlock is off the end of the function, it can't fall through.
if (Fallthrough == getParent()->end())
return false;
// If FallthroughBlock isn't a successor, no fallthrough is possible.
- if (!isSuccessor(Fallthrough))
+ if (!isSuccessor(&*Fallthrough))
return false;
// Analyze the branches, if any, at the end of the block.
@@ -666,11 +716,11 @@ MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Splitting the critical edge to a landing pad block is non-trivial. Don't do
// it in this generic function.
- if (Succ->isLandingPad())
+ if (Succ->isEHPad())
return nullptr;
MachineFunction *MF = getParent();
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc DL; // FIXME: this is nowhere
// Performance might be harmed on HW that implements branching using exec mask
// where both sides of the branches are always executed.
@@ -719,7 +769,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (LV)
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I) {
- MachineInstr *MI = I;
+ MachineInstr *MI = &*I;
for (MachineInstr::mop_iterator OI = MI->operands_begin(),
OE = MI->operands_end(); OI != OE; ++OI) {
if (!OI->isReg() || OI->getReg() == 0 ||
@@ -739,7 +789,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (LIS) {
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I) {
- MachineInstr *MI = I;
+ MachineInstr *MI = &*I;
for (MachineInstr::mop_iterator OI = MI->operands_begin(),
OE = MI->operands_end(); OI != OE; ++OI) {
@@ -761,7 +811,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (Indexes) {
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I)
- Terminators.push_back(I);
+ Terminators.push_back(&*I);
}
updateTerminator();
@@ -770,7 +820,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
SmallVector<MachineInstr*, 4> NewTerminators;
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I)
- NewTerminators.push_back(I);
+ NewTerminators.push_back(&*I);
for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
E = Terminators.end(); I != E; ++I) {
@@ -784,17 +834,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
NMBB->addSuccessor(Succ);
if (!NMBB->isLayoutSuccessor(Succ)) {
Cond.clear();
- MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond,
- dl);
+ TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);
if (Indexes) {
for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
I != E; ++I) {
// Some instructions may have been moved to NMBB by updateTerminator(),
// so we first remove any instruction that already has an index.
- if (Indexes->hasIndex(I))
- Indexes->removeMachineInstrFromMaps(I);
- Indexes->insertMachineInstrInMaps(I);
+ if (Indexes->hasIndex(&*I))
+ Indexes->removeMachineInstrFromMaps(&*I);
+ Indexes->insertMachineInstrInMaps(&*I);
}
}
}
@@ -808,9 +857,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
i->getOperand(ni+1).setMBB(NMBB);
// Inherit live-ins from the successor
- for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
- E = Succ->livein_end(); I != E; ++I)
- NMBB->addLiveIn(*I);
+ for (const auto &LI : Succ->liveins())
+ NMBB->addLiveIn(LI);
// Update LiveVariables.
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -822,7 +870,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
- LV->getVarInfo(Reg).Kills.push_back(I);
+ LV->getVarInfo(Reg).Kills.push_back(&*I);
DEBUG(dbgs() << "Restored terminator kill: " << *I);
break;
}
@@ -834,10 +882,10 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (LIS) {
// After splitting the edge and updating SlotIndexes, live intervals may be
// in one of two situations, depending on whether this block was the last in
- // the function. If the original block was the last in the function, all live
- // intervals will end prior to the beginning of the new split block. If the
- // original block was not at the end of the function, all live intervals will
- // extend to the end of the new split block.
+ // the function. If the original block was the last in the function, all
+ // live intervals will end prior to the beginning of the new split block. If
+ // the original block was not at the end of the function, all live intervals
+ // will extend to the end of the new split block.
bool isLastMBB =
std::next(MachineFunction::iterator(NMBB)) == getParent()->end();
@@ -861,7 +909,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
LiveInterval &LI = LIS->getInterval(Reg);
VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
- assert(VNI && "PHI sources should be live out of their predecessors.");
+ assert(VNI &&
+ "PHI sources should be live out of their predecessors.");
LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
}
}
@@ -941,7 +990,7 @@ static void unbundleSingleMI(MachineInstr *MI) {
MachineBasicBlock::instr_iterator
MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) {
- unbundleSingleMI(I);
+ unbundleSingleMI(&*I);
return Insts.erase(I);
}
@@ -964,25 +1013,22 @@ MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
return Insts.insert(I, MI);
}
-/// removeFromParent - This method unlinks 'this' from the containing function,
-/// and returns it, but does not delete it.
+/// This method unlinks 'this' from the containing function, and returns it, but
+/// does not delete it.
MachineBasicBlock *MachineBasicBlock::removeFromParent() {
assert(getParent() && "Not embedded in a function!");
getParent()->remove(this);
return this;
}
-
-/// eraseFromParent - This method unlinks 'this' from the containing function,
-/// and deletes it.
+/// This method unlinks 'this' from the containing function, and deletes it.
void MachineBasicBlock::eraseFromParent() {
assert(getParent() && "Not embedded in a function!");
getParent()->erase(this);
}
-
-/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
-/// 'Old', change the code and CFG so that it branches to 'New' instead.
+/// Given a machine basic block that branched to 'Old', change the code and CFG
+/// so that it branches to 'New' instead.
void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
MachineBasicBlock *New) {
assert(Old != New && "Cannot replace self with self!");
@@ -1004,46 +1050,44 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
replaceSuccessor(Old, New);
}
-/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
-/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
-/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be
-/// null.
+/// Various pieces of code can cause excess edges in the CFG to be inserted. If
+/// we have proven that MBB can only branch to DestA and DestB, remove any other
+/// MBB successors from the CFG. DestA and DestB can be null.
///
/// Besides DestA and DestB, retain other edges leading to LandingPads
/// (currently there can be only one; we don't check or require that here).
/// Note it is possible that DestA and/or DestB are LandingPads.
bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
MachineBasicBlock *DestB,
- bool isCond) {
+ bool IsCond) {
// The values of DestA and DestB frequently come from a call to the
// 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
// values from there.
//
// 1. If both DestA and DestB are null, then the block ends with no branches
// (it falls through to its successor).
- // 2. If DestA is set, DestB is null, and isCond is false, then the block ends
+ // 2. If DestA is set, DestB is null, and IsCond is false, then the block ends
// with only an unconditional branch.
- // 3. If DestA is set, DestB is null, and isCond is true, then the block ends
+ // 3. If DestA is set, DestB is null, and IsCond is true, then the block ends
// with a conditional branch that falls through to a successor (DestB).
- // 4. If DestA and DestB is set and isCond is true, then the block ends with a
+ // 4. If DestA and DestB is set and IsCond is true, then the block ends with a
// conditional branch followed by an unconditional branch. DestA is the
// 'true' destination and DestB is the 'false' destination.
bool Changed = false;
- MachineFunction::iterator FallThru =
- std::next(MachineFunction::iterator(this));
+ MachineFunction::iterator FallThru = std::next(getIterator());
if (!DestA && !DestB) {
// Block falls through to successor.
- DestA = FallThru;
- DestB = FallThru;
+ DestA = &*FallThru;
+ DestB = &*FallThru;
} else if (DestA && !DestB) {
- if (isCond)
+ if (IsCond)
// Block ends in conditional jump that falls through to successor.
- DestB = FallThru;
+ DestB = &*FallThru;
} else {
- assert(DestA && DestB && isCond &&
+ assert(DestA && DestB && IsCond &&
"CFG in a bad state. Cannot correct CFG edges");
}
@@ -1054,7 +1098,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
while (SI != succ_end()) {
const MachineBasicBlock *MBB = *SI;
if (!SeenMBBs.insert(MBB).second ||
- (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+ (MBB != DestA && MBB != DestB && !MBB->isEHPad())) {
// This is a superfluous edge, remove it.
SI = removeSuccessor(SI);
Changed = true;
@@ -1063,11 +1107,13 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
}
}
+ if (Changed)
+ normalizeSuccProbs();
return Changed;
}
-/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
-/// any DBG_VALUE instructions. Return UnknownLoc if there is none.
+/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
+/// instructions. Return UnknownLoc if there is none.
DebugLoc
MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
DebugLoc DL;
@@ -1083,40 +1129,55 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
return DL;
}
-/// getSuccWeight - Return weight of the edge from this block to MBB.
-///
-uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
- if (Weights.empty())
- return 0;
-
- return *getWeightIterator(Succ);
+/// Return probability of the edge from this block to MBB.
+BranchProbability
+MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const {
+ if (Probs.empty())
+ return BranchProbability(1, succ_size());
+
+ const auto &Prob = *getProbabilityIterator(Succ);
+ if (Prob.isUnknown()) {
+ // For unknown probabilities, collect the sum of all known ones, and evenly
+ // ditribute the complemental of the sum to each unknown probability.
+ unsigned KnownProbNum = 0;
+ auto Sum = BranchProbability::getZero();
+ for (auto &P : Probs) {
+ if (!P.isUnknown()) {
+ Sum += P;
+ KnownProbNum++;
+ }
+ }
+ return Sum.getCompl() / (Probs.size() - KnownProbNum);
+ } else
+ return Prob;
}
-/// Set successor weight of a given iterator.
-void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) {
- if (Weights.empty())
+/// Set successor probability of a given iterator.
+void MachineBasicBlock::setSuccProbability(succ_iterator I,
+ BranchProbability Prob) {
+ assert(!Prob.isUnknown());
+ if (Probs.empty())
return;
- *getWeightIterator(I) = weight;
+ *getProbabilityIterator(I) = Prob;
}
-/// getWeightIterator - Return wight iterator corresonding to the I successor
-/// iterator
-MachineBasicBlock::weight_iterator MachineBasicBlock::
-getWeightIterator(MachineBasicBlock::succ_iterator I) {
- assert(Weights.size() == Successors.size() && "Async weight list!");
- size_t index = std::distance(Successors.begin(), I);
- assert(index < Weights.size() && "Not a current successor!");
- return Weights.begin() + index;
+/// Return probability iterator corresonding to the I successor iterator
+MachineBasicBlock::const_probability_iterator
+MachineBasicBlock::getProbabilityIterator(
+ MachineBasicBlock::const_succ_iterator I) const {
+ assert(Probs.size() == Successors.size() && "Async probability list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Probs.size() && "Not a current successor!");
+ return Probs.begin() + index;
}
-/// getWeightIterator - Return wight iterator corresonding to the I successor
-/// iterator
-MachineBasicBlock::const_weight_iterator MachineBasicBlock::
-getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
- assert(Weights.size() == Successors.size() && "Async weight list!");
+/// Return probability iterator corresonding to the I successor iterator.
+MachineBasicBlock::probability_iterator
+MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
+ assert(Probs.size() == Successors.size() && "Async probability list!");
const size_t index = std::distance(Successors.begin(), I);
- assert(index < Weights.size() && "Not a current successor!");
- return Weights.begin() + index;
+ assert(index < Probs.size() && "Not a current successor!");
+ return Probs.begin() + index;
}
/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
@@ -1138,33 +1199,33 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
do {
--I;
- MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MachineOperandIteratorBase::PhysRegInfo Info =
ConstMIOperands(I).analyzePhysReg(Reg, TRI);
- if (Analysis.Defines)
- // Outputs happen after inputs so they take precedence if both are
- // present.
- return Analysis.DefinesDead ? LQR_Dead : LQR_Live;
+ // Defs happen after uses so they take precedence if both are present.
- if (Analysis.Kills || Analysis.Clobbers)
- // Register killed, so isn't live.
+ // Register is dead after a dead def of the full register.
+ if (Info.DeadDef)
return LQR_Dead;
-
- else if (Analysis.ReadsOverlap)
- // Defined or read without a previous kill - live.
- return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
-
+ // Register is (at least partially) live after a def.
+ if (Info.Defined)
+ return LQR_Live;
+ // Register is dead after a full kill or clobber and no def.
+ if (Info.Killed || Info.Clobbered)
+ return LQR_Dead;
+ // Register must be live if we read it.
+ if (Info.Read)
+ return LQR_Live;
} while (I != begin() && --N > 0);
}
// Did we get to the start of the block?
if (I == begin()) {
// If so, the register's state is definitely defined by the live-in state.
- for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
- RAI.isValid(); ++RAI) {
+ for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid();
+ ++RAI)
if (isLiveIn(*RAI))
- return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
- }
+ return LQR_Live;
return LQR_Dead;
}
@@ -1176,16 +1237,14 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
// If this is the last insn in the block, don't search forwards.
if (I != end()) {
for (++I; I != end() && N > 0; ++I, --N) {
- MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MachineOperandIteratorBase::PhysRegInfo Info =
ConstMIOperands(I).analyzePhysReg(Reg, TRI);
- if (Analysis.ReadsOverlap)
- // Used, therefore must have been live.
- return (Analysis.Reads) ?
- LQR_Live : LQR_OverlappingLive;
-
- else if (Analysis.Clobbers || Analysis.Defines)
- // Defined (but not read) therefore cannot have been live.
+ // Register is live when we read it here.
+ if (Info.Read)
+ return LQR_Live;
+ // Register is dead if we can fully overwrite or clobber it here.
+ if (Info.FullyDefined || Info.Clobbered)
return LQR_Dead;
}
}
@@ -1193,3 +1252,17 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
// At this point we have no idea of the liveness of the register.
return LQR_Unknown;
}
+
+const uint32_t *
+MachineBasicBlock::getBeginClobberMask(const TargetRegisterInfo *TRI) const {
+ // EH funclet entry does not preserve any registers.
+ return isEHFuncletEntry() ? TRI->getNoPreservedMask() : nullptr;
+}
+
+const uint32_t *
+MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
+ // If we see a return block with successors, this must be a funclet return,
+ // which does not preserve any registers. If there are no successors, we don't
+ // care what kind of return it is, putting a mask after it is a no-op.
+ return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 9151d99..9119e31 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -57,7 +57,7 @@ struct GraphTraits<MachineBlockFrequencyInfo *> {
static inline
const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) {
- return G->getFunction()->begin();
+ return &G->getFunction()->front();
}
static ChildIteratorType child_begin(const NodeType *N) {
@@ -143,7 +143,7 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
if (!MBFI)
MBFI.reset(new ImplType);
- MBFI->doFunction(&F, &MBPI, &MLI);
+ MBFI->calculate(F, MBPI, MLI);
#ifndef NDEBUG
if (ViewMachineBlockFreqPropagationDAG != GVDT_None) {
view();
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 2969bad..f5e3056 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -51,7 +51,7 @@ using namespace llvm;
#define DEBUG_TYPE "block-placement"
STATISTIC(NumCondBranches, "Number of conditional branches");
-STATISTIC(NumUncondBranches, "Number of uncondittional branches");
+STATISTIC(NumUncondBranches, "Number of unconditional branches");
STATISTIC(CondBranchTakenFreq,
"Potential frequency of taking conditional branches");
STATISTIC(UncondBranchTakenFreq,
@@ -62,6 +62,11 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
"blocks in the function."),
cl::init(0), cl::Hidden);
+static cl::opt<unsigned>
+ AlignAllLoops("align-all-loops",
+ cl::desc("Force the alignment of all loops in the function."),
+ cl::init(0), cl::Hidden);
+
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned> ExitBlockBias(
"block-placement-exit-block-bias",
@@ -81,6 +86,29 @@ static cl::opt<unsigned> OutlineOptionalThreshold(
"instruction count below this threshold"),
cl::init(4), cl::Hidden);
+static cl::opt<unsigned> LoopToColdBlockRatio(
+ "loop-to-cold-block-ratio",
+ cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
+ "(frequency of block) is greater than this ratio"),
+ cl::init(5), cl::Hidden);
+
+static cl::opt<bool>
+ PreciseRotationCost("precise-rotation-cost",
+ cl::desc("Model the cost of loop rotation more "
+ "precisely by using profile data."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> MisfetchCost(
+ "misfetch-cost",
+ cl::desc("Cost that models the probablistic risk of an instruction "
+ "misfetch due to a jump comparing to falling through, whose cost "
+ "is zero."),
+ cl::init(1), cl::Hidden);
+
+static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
+ cl::desc("Cost of jump instructions."),
+ cl::init(1), cl::Hidden);
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
@@ -246,9 +274,12 @@ class MachineBlockPlacement : public MachineFunctionPass {
const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
+ BlockFilterSet collectLoopBlockSet(MachineFunction &F, MachineLoop &L);
void buildLoopChains(MachineFunction &F, MachineLoop &L);
void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
const BlockFilterSet &LoopBlockSet);
+ void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
void buildCFGChains(MachineFunction &F);
public:
@@ -354,31 +385,56 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
const BranchProbability HotProb(4, 5); // 80%
MachineBasicBlock *BestSucc = nullptr;
- // FIXME: Due to the performance of the probability and weight routines in
- // the MBPI analysis, we manually compute probabilities using the edge
- // weights. This is suboptimal as it means that the somewhat subtle
- // definition of edge weight semantics is encoded here as well. We should
- // improve the MBPI interface to efficiently support query patterns such as
- // this.
- uint32_t BestWeight = 0;
- uint32_t WeightScale = 0;
- uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
- DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ auto BestProb = BranchProbability::getZero();
+
+ // Adjust edge probabilities by excluding edges pointing to blocks that is
+ // either not in BlockFilter or is already in the current chain. Consider the
+ // following CFG:
+ //
+ // --->A
+ // | / \
+ // | B C
+ // | \ / \
+ // ----D E
+ //
+ // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
+ // A->C is chosen as a fall-through, D won't be selected as a successor of C
+ // due to CFG constraint (the probability of C->D is not greater than
+ // HotProb). If we exclude E that is not in BlockFilter when calculating the
+ // probability of C->D, D will be selected and we will get A C D B as the
+ // layout of this loop.
+ auto AdjustedSumProb = BranchProbability::getOne();
+ SmallVector<MachineBasicBlock *, 4> Successors;
for (MachineBasicBlock *Succ : BB->successors()) {
- if (BlockFilter && !BlockFilter->count(Succ))
- continue;
- BlockChain &SuccChain = *BlockToChain[Succ];
- if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Already merged!\n");
- continue;
- }
- if (Succ != *SuccChain.begin()) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
- continue;
+ bool SkipSucc = false;
+ if (BlockFilter && !BlockFilter->count(Succ)) {
+ SkipSucc = true;
+ } else {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(Succ)
+ << " -> Already merged!\n");
+ SkipSucc = true;
+ } else if (Succ != *SuccChain->begin()) {
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
+ continue;
+ }
}
+ if (SkipSucc)
+ AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ);
+ else
+ Successors.push_back(Succ);
+ }
- uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
- BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock *Succ : Successors) {
+ BranchProbability SuccProb;
+ uint32_t SuccProbN = MBPI->getEdgeProbability(BB, Succ).getNumerator();
+ uint32_t SuccProbD = AdjustedSumProb.getNumerator();
+ if (SuccProbN >= SuccProbD)
+ SuccProb = BranchProbability::getOne();
+ else
+ SuccProb = BranchProbability(SuccProbN, SuccProbD);
// If we outline optional branches, look whether Succ is unavoidable, i.e.
// dominates all terminators of the MachineFunction. If it does, other
@@ -406,6 +462,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
// Only consider successors which are either "hot", or wouldn't violate
// any CFG constraints.
+ BlockChain &SuccChain = *BlockToChain[Succ];
if (SuccChain.LoopPredecessors != 0) {
if (SuccProb < HotProb) {
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
@@ -415,8 +472,9 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
// Make sure that a hot successor doesn't have a globally more
// important predecessor.
+ auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
BlockFrequency CandidateEdgeFreq =
- MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+ MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl();
bool BadCFGConflict = false;
for (MachineBasicBlock *Pred : Succ->predecessors()) {
if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
@@ -440,10 +498,10 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
<< " (prob)"
<< (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
<< "\n");
- if (BestSucc && BestWeight >= SuccWeight)
+ if (BestSucc && BestProb >= SuccProb)
continue;
BestSucc = Succ;
- BestWeight = SuccWeight;
+ BestProb = SuccProb;
}
return BestSucc;
}
@@ -505,14 +563,14 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
const BlockFilterSet *BlockFilter) {
for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
++I) {
- if (BlockFilter && !BlockFilter->count(I))
+ if (BlockFilter && !BlockFilter->count(&*I))
continue;
- if (BlockToChain[I] != &PlacedChain) {
+ if (BlockToChain[&*I] != &PlacedChain) {
PrevUnplacedBlockIt = I;
// Now select the head of the chain to which the unplaced block belongs
// as the block to place. This will force the entire chain to be placed,
// and satisfies the requirements of merging chains.
- return *BlockToChain[I]->begin();
+ return *BlockToChain[&*I]->begin();
}
}
return nullptr;
@@ -672,13 +730,8 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
MachineBasicBlock *OldExitingBB = ExitingBB;
BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
bool HasLoopingSucc = false;
- // FIXME: Due to the performance of the probability and weight routines in
- // the MBPI analysis, we use the internal weights and manually compute the
- // probabilities to avoid quadratic behavior.
- uint32_t WeightScale = 0;
- uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
for (MachineBasicBlock *Succ : MBB->successors()) {
- if (Succ->isLandingPad())
+ if (Succ->isEHPad())
continue;
if (Succ == MBB)
continue;
@@ -690,10 +743,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
continue;
}
- uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
+ auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
if (LoopBlockSet.count(Succ)) {
DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
- << getBlockName(Succ) << " (" << SuccWeight << ")\n");
+ << getBlockName(Succ) << " (" << SuccProb << ")\n");
HasLoopingSucc = true;
continue;
}
@@ -705,7 +758,6 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
BlocksExitingToOuterLoop.insert(MBB);
}
- BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
<< getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
@@ -791,6 +843,188 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
}
+/// \brief Attempt to rotate a loop based on profile data to reduce branch cost.
+///
+/// With profile data, we can determine the cost in terms of missed fall through
+/// opportunities when rotating a loop chain and select the best rotation.
+/// Basically, there are three kinds of cost to consider for each rotation:
+/// 1. The possibly missed fall through edge (if it exists) from BB out of
+/// the loop to the loop header.
+/// 2. The possibly missed fall through edges (if they exist) from the loop
+/// exits to BB out of the loop.
+/// 3. The missed fall through edge (if it exists) from the last BB to the
+/// first BB in the loop chain.
+/// Therefore, the cost for a given rotation is the sum of costs listed above.
+/// We select the best rotation with the smallest cost.
+void MachineBlockPlacement::rotateLoopWithProfile(
+ BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
+ auto HeaderBB = L.getHeader();
+ auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB);
+ auto RotationPos = LoopChain.end();
+
+ BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
+
+ // A utility lambda that scales up a block frequency by dividing it by a
+ // branch probability which is the reciprocal of the scale.
+ auto ScaleBlockFrequency = [](BlockFrequency Freq,
+ unsigned Scale) -> BlockFrequency {
+ if (Scale == 0)
+ return 0;
+ // Use operator / between BlockFrequency and BranchProbability to implement
+ // saturating multiplication.
+ return Freq / BranchProbability(1, Scale);
+ };
+
+ // Compute the cost of the missed fall-through edge to the loop header if the
+ // chain head is not the loop header. As we only consider natural loops with
+ // single header, this computation can be done only once.
+ BlockFrequency HeaderFallThroughCost(0);
+ for (auto *Pred : HeaderBB->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ auto EdgeFreq =
+ MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
+ auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
+ // If the predecessor has only an unconditional jump to the header, we
+ // need to consider the cost of this jump.
+ if (Pred->succ_size() == 1)
+ FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
+ HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
+ }
+ }
+
+ // Here we collect all exit blocks in the loop, and for each exit we find out
+ // its hottest exit edge. For each loop rotation, we define the loop exit cost
+ // as the sum of frequencies of exit edges we collect here, excluding the exit
+ // edge from the tail of the loop chain.
+ SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq;
+ for (auto BB : LoopChain) {
+ auto LargestExitEdgeProb = BranchProbability::getZero();
+ for (auto *Succ : BB->successors()) {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (!LoopBlockSet.count(Succ) &&
+ (!SuccChain || Succ == *SuccChain->begin())) {
+ auto SuccProb = MBPI->getEdgeProbability(BB, Succ);
+ LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
+ }
+ }
+ if (LargestExitEdgeProb > BranchProbability::getZero()) {
+ auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
+ ExitsWithFreq.emplace_back(BB, ExitFreq);
+ }
+ }
+
+ // In this loop we iterate every block in the loop chain and calculate the
+ // cost assuming the block is the head of the loop chain. When the loop ends,
+ // we should have found the best candidate as the loop chain's head.
+ for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
+ EndIter = LoopChain.end();
+ Iter != EndIter; Iter++, TailIter++) {
+ // TailIter is used to track the tail of the loop chain if the block we are
+ // checking (pointed by Iter) is the head of the chain.
+ if (TailIter == LoopChain.end())
+ TailIter = LoopChain.begin();
+
+ auto TailBB = *TailIter;
+
+ // Calculate the cost by putting this BB to the top.
+ BlockFrequency Cost = 0;
+
+ // If the current BB is the loop header, we need to take into account the
+ // cost of the missed fall through edge from outside of the loop to the
+ // header.
+ if (Iter != HeaderIter)
+ Cost += HeaderFallThroughCost;
+
+ // Collect the loop exit cost by summing up frequencies of all exit edges
+ // except the one from the chain tail.
+ for (auto &ExitWithFreq : ExitsWithFreq)
+ if (TailBB != ExitWithFreq.first)
+ Cost += ExitWithFreq.second;
+
+ // The cost of breaking the once fall-through edge from the tail to the top
+ // of the loop chain. Here we need to consider three cases:
+ // 1. If the tail node has only one successor, then we will get an
+ // additional jmp instruction. So the cost here is (MisfetchCost +
+ // JumpInstCost) * tail node frequency.
+ // 2. If the tail node has two successors, then we may still get an
+ // additional jmp instruction if the layout successor after the loop
+ // chain is not its CFG successor. Note that the more frequently executed
+ // jmp instruction will be put ahead of the other one. Assume the
+ // frequency of those two branches are x and y, where x is the frequency
+ // of the edge to the chain head, then the cost will be
+ // (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency.
+ // 3. If the tail node has more than two successors (this rarely happens),
+ // we won't consider any additional cost.
+ if (TailBB->isSuccessor(*Iter)) {
+ auto TailBBFreq = MBFI->getBlockFreq(TailBB);
+ if (TailBB->succ_size() == 1)
+ Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
+ MisfetchCost + JumpInstCost);
+ else if (TailBB->succ_size() == 2) {
+ auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
+ auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
+ auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
+ ? TailBBFreq * TailToHeadProb.getCompl()
+ : TailToHeadFreq;
+ Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) +
+ ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
+ }
+ }
+
+ DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockNum(*Iter)
+ << " to the top: " << Cost.getFrequency() << "\n");
+
+ if (Cost < SmallestRotationCost) {
+ SmallestRotationCost = Cost;
+ RotationPos = Iter;
+ }
+ }
+
+ if (RotationPos != LoopChain.end()) {
+ DEBUG(dbgs() << "Rotate loop by making " << getBlockNum(*RotationPos)
+ << " to the top\n");
+ std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
+ }
+}
+
+/// \brief Collect blocks in the given loop that are to be placed.
+///
+/// When profile data is available, exclude cold blocks from the returned set;
+/// otherwise, collect all blocks in the loop.
+MachineBlockPlacement::BlockFilterSet
+MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) {
+ BlockFilterSet LoopBlockSet;
+
+ // Filter cold blocks off from LoopBlockSet when profile data is available.
+ // Collect the sum of frequencies of incoming edges to the loop header from
+ // outside. If we treat the loop as a super block, this is the frequency of
+ // the loop. Then for each block in the loop, we calculate the ratio between
+ // its frequency and the frequency of the loop block. When it is too small,
+ // don't add it to the loop chain. If there are outer loops, then this block
+ // will be merged into the first outer loop chain for which this block is not
+ // cold anymore. This needs precise profile data and we only do this when
+ // profile data is available.
+ if (F.getFunction()->getEntryCount()) {
+ BlockFrequency LoopFreq(0);
+ for (auto LoopPred : L.getHeader()->predecessors())
+ if (!L.contains(LoopPred))
+ LoopFreq += MBFI->getBlockFreq(LoopPred) *
+ MBPI->getEdgeProbability(LoopPred, L.getHeader());
+
+ for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
+ if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
+ continue;
+ LoopBlockSet.insert(LoopBB);
+ }
+ } else
+ LoopBlockSet.insert(L.block_begin(), L.block_end());
+
+ return LoopBlockSet;
+}
+
/// \brief Forms basic block chains from the natural loop structures.
///
/// These chains are designed to preserve the existing *structure* of the code
@@ -805,19 +1039,27 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
buildLoopChains(F, *InnerLoop);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
- BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+ BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L);
+
+ // Check if we have profile data for this function. If yes, we will rotate
+ // this loop by modeling costs more precisely which requires the profile data
+ // for better layout.
+ bool RotateLoopWithProfile =
+ PreciseRotationCost && F.getFunction()->getEntryCount();
// First check to see if there is an obviously preferable top block for the
// loop. This will default to the header, but may end up as one of the
// predecessors to the header if there is one which will result in strictly
// fewer branches in the loop body.
- MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
+ // When we use profile data to rotate the loop, this is unnecessary.
+ MachineBasicBlock *LoopTop =
+ RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
// branches by placing an exit edge at the bottom.
MachineBasicBlock *ExitingBB = nullptr;
- if (LoopTop == L.getHeader())
+ if (!RotateLoopWithProfile && LoopTop == L.getHeader())
ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -828,7 +1070,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
assert(LoopChain.LoopPredecessors == 0);
UpdatedPreds.insert(&LoopChain);
- for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+
+ for (MachineBasicBlock *LoopBB : LoopBlockSet) {
BlockChain &Chain = *BlockToChain[LoopBB];
if (!UpdatedPreds.insert(&Chain).second)
continue;
@@ -848,7 +1091,11 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
}
buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
- rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
+
+ if (RotateLoopWithProfile)
+ rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
+ else
+ rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
DEBUG({
// Crash at the end so we get all of the debugging output first.
@@ -889,7 +1136,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// the assumptions of the remaining algorithm.
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- MachineBasicBlock *BB = FI;
+ MachineBasicBlock *BB = &*FI;
BlockChain *Chain =
new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
// Also, merge any blocks which we cannot reason about and must preserve
@@ -900,8 +1147,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
break;
- MachineFunction::iterator NextFI(std::next(FI));
- MachineBasicBlock *NextBB = NextFI;
+ MachineFunction::iterator NextFI = std::next(FI);
+ MachineBasicBlock *NextBB = &*NextFI;
// Ensure that the layout successor is a viable block, as we know that
// fallthrough is a possibility.
assert(NextFI != FE && "Can't fallthrough past the last block.");
@@ -1004,7 +1251,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Update the terminator of the previous block.
if (ChainBB == *FunctionChain.begin())
continue;
- MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
+ MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB));
// FIXME: It would be awesome of updateTerminator would just return rather
// than assert when the branch cannot be analyzed in order to remove this
@@ -1035,14 +1282,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
// If PrevBB has a two-way branch, try to re-order the branches
- // such that we branch to the successor with higher weight first.
+ // such that we branch to the successor with higher probability first.
if (TBB && !Cond.empty() && FBB &&
- MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+ MBPI->getEdgeProbability(PrevBB, FBB) >
+ MBPI->getEdgeProbability(PrevBB, TBB) &&
!TII->ReverseBranchCondition(Cond)) {
DEBUG(dbgs() << "Reverse order of the two branches: "
<< getBlockName(PrevBB) << "\n");
- DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
- << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+ DEBUG(dbgs() << " Edge probability: "
+ << MBPI->getEdgeProbability(PrevBB, FBB) << " vs "
+ << MBPI->getEdgeProbability(PrevBB, TBB) << "\n");
DebugLoc dl; // FIXME: this is nowhere
TII->RemoveBranch(*PrevBB);
TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
@@ -1064,13 +1313,14 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
+ // FIXME: Use Function::optForSize().
if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
return;
if (FunctionChain.begin() == FunctionChain.end())
return; // Empty chain.
const BranchProbability ColdProb(1, 5); // 20%
- BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(&F.front());
BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
for (MachineBasicBlock *ChainBB : FunctionChain) {
if (ChainBB == *FunctionChain.begin())
@@ -1084,6 +1334,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!L)
continue;
+ if (AlignAllLoops) {
+ ChainBB->setAlignment(AlignAllLoops);
+ continue;
+ }
+
unsigned Align = TLI->getPrefLoopAlignment(L);
if (!Align)
continue; // Don't care about loop alignment.
@@ -1224,4 +1479,3 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
return false;
}
-
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 6fbc2be..cf6d401 100644
--- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -28,91 +28,48 @@ char MachineBranchProbabilityInfo::ID = 0;
void MachineBranchProbabilityInfo::anchor() { }
-uint32_t MachineBranchProbabilityInfo::
-getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
- // First we compute the sum with 64-bits of precision, ensuring that cannot
- // overflow by bounding the number of weights considered. Hopefully no one
- // actually needs 2^32 successors.
- assert(MBB->succ_size() < UINT32_MAX);
- uint64_t Sum = 0;
- Scale = 1;
- for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, I);
- Sum += Weight;
- }
-
- // If the computed sum fits in 32-bits, we're done.
- if (Sum <= UINT32_MAX)
- return Sum;
-
- // Otherwise, compute the scale necessary to cause the weights to fit, and
- // re-sum with that scale applied.
- assert((Sum / UINT32_MAX) < UINT32_MAX);
- Scale = (Sum / UINT32_MAX) + 1;
- Sum = 0;
- for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, I);
- Sum += Weight / Scale;
- }
- assert(Sum <= UINT32_MAX);
- return Sum;
-}
-
-uint32_t MachineBranchProbabilityInfo::
-getEdgeWeight(const MachineBasicBlock *Src,
- MachineBasicBlock::const_succ_iterator Dst) const {
- uint32_t Weight = Src->getSuccWeight(Dst);
- if (!Weight)
- return DEFAULT_WEIGHT;
- return Weight;
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+ const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
+ return Src->getSuccProbability(Dst);
}
-uint32_t MachineBranchProbabilityInfo::
-getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+ const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
// This is a linear search. Try to use the const_succ_iterator version when
// possible.
- return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+ return getEdgeProbability(Src,
+ std::find(Src->succ_begin(), Src->succ_end(), Dst));
}
bool
MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
- // FIXME: Compare against a static "hot" BranchProbability.
- return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
+ static BranchProbability HotProb(4, 5);
+ return getEdgeProbability(Src, Dst) > HotProb;
}
MachineBasicBlock *
MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
- uint32_t MaxWeight = 0;
+ auto MaxProb = BranchProbability::getZero();
MachineBasicBlock *MaxSucc = nullptr;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, I);
- if (Weight > MaxWeight) {
- MaxWeight = Weight;
+ auto Prob = getEdgeProbability(MBB, I);
+ if (Prob > MaxProb) {
+ MaxProb = Prob;
MaxSucc = *I;
}
}
- if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
+ static BranchProbability HotProb(4, 5);
+ if (getEdgeProbability(MBB, MaxSucc) >= HotProb)
return MaxSucc;
return nullptr;
}
-BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
- const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
- uint32_t Scale = 1;
- uint32_t D = getSumForBlock(Src, Scale);
- uint32_t N = getEdgeWeight(Src, Dst) / Scale;
-
- return BranchProbability(N, D);
-}
-
raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability(
raw_ostream &OS, const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const {
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 87aaaa0..021707b 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -57,7 +57,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreservedID(MachineLoopInfoID);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -111,7 +111,7 @@ char &llvm::MachineCSEID = MachineCSE::ID;
INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
@@ -714,7 +714,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
return PerformCSE(DT->getRootNode());
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index f33d0e6..fa43c4d 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -10,6 +10,7 @@
// The machine combiner pass uses machine trace metrics to ensure the combined
// instructions does not lengthen the critical path or the resource depth.
//===----------------------------------------------------------------------===//
+
#define DEBUG_TYPE "machine-combiner"
#include "llvm/ADT/Statistic.h"
@@ -68,10 +69,10 @@ private:
MachineTraceMetrics::Trace BlockTrace);
bool
improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
- MachineTraceMetrics::Trace BlockTrace,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- bool NewCodeHasLessInsts);
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineCombinerPattern Pattern);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -122,9 +123,9 @@ unsigned
MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
MachineTraceMetrics::Trace BlockTrace) {
-
SmallVector<unsigned, 16> InstrDepth;
- assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+ assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+ "Missing machine model\n");
// For each instruction in the new sequence compute the depth based on the
// operands. Use the trace information when possible. For new operands which
@@ -180,8 +181,8 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
/// \returns Latency of \p NewRoot
unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
MachineTraceMetrics::Trace BlockTrace) {
-
- assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+ assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+ "Missing machine model\n");
// Check each definition in NewRoot and compute the latency
unsigned NewRootLatency = 0;
@@ -202,62 +203,86 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
UseMO->findRegisterUseOperandIdx(MO.getReg()));
} else {
- LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
+ LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
}
NewRootLatency = std::max(NewRootLatency, LatencyOp);
}
return NewRootLatency;
}
-/// True when the new instruction sequence does not lengthen the critical path
-/// and the new sequence has less instructions or the new sequence improves the
-/// critical path.
+/// The combiner's goal may differ based on which pattern it is attempting
+/// to optimize.
+enum class CombinerObjective {
+ MustReduceDepth, // The data dependency chain must be improved.
+ Default // The critical path must not be lengthened.
+};
+
+static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
+ // TODO: If C++ ever gets a real enum class, make this part of the
+ // MachineCombinerPattern class.
+ switch (P) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ return CombinerObjective::MustReduceDepth;
+ default:
+ return CombinerObjective::Default;
+ }
+}
+
/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
/// The new code sequence ends in MI NewRoot. A necessary condition for the new
/// sequence to replace the old sequence is that it cannot lengthen the critical
-/// path. This is decided by the formula:
-/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
-/// If the new sequence has an equal length critical path but does not reduce
-/// the number of instructions (NewCodeHasLessInsts is false), then it is not
-/// considered an improvement. The slack is the number of cycles Root can be
-/// delayed before the critical patch becomes longer.
+/// path. The definition of "improve" may be restricted by specifying that the
+/// new path improves the data dependency chain (MustReduceDepth).
bool MachineCombiner::improvesCriticalPathLen(
MachineBasicBlock *MBB, MachineInstr *Root,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- bool NewCodeHasLessInsts) {
-
- assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+ MachineCombinerPattern Pattern) {
+ assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+ "Missing machine model\n");
// NewRoot is the last instruction in the \p InsInstrs vector.
- // Get depth and latency of NewRoot.
unsigned NewRootIdx = InsInstrs.size() - 1;
MachineInstr *NewRoot = InsInstrs[NewRootIdx];
- unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
- unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
- // Get depth, latency and slack of Root.
+ // Get depth and latency of NewRoot and Root.
+ unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
+
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+ dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
+ dbgs() << " RootDepth: " << RootDepth << "\n");
+
+ // For a transform such as reassociation, the cost equation is
+ // conservatively calculated so that we must improve the depth (data
+ // dependency cycles) in the critical path to proceed with the transform.
+ // Being conservative also protects against inaccuracies in the underlying
+ // machine trace metrics and CPU models.
+ if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth)
+ return NewRootDepth < RootDepth;
+
+ // A more flexible cost calculation for the critical path includes the slack
+ // of the original code sequence. This may allow the transform to proceed
+ // even if the instruction depths (data dependency cycles) become worse.
+ unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
unsigned RootSlack = BlockTrace.getInstrSlack(Root);
- DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
- dbgs() << " NewRootDepth: " << NewRootDepth
- << " NewRootLatency: " << NewRootLatency << "\n";
- dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
- << " RootSlack: " << RootSlack << "\n";
- dbgs() << " NewRootDepth + NewRootLatency "
+ DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
+ dbgs() << " RootLatency: " << RootLatency << "\n";
+ dbgs() << " RootSlack: " << RootSlack << "\n";
+ dbgs() << " NewRootDepth + NewRootLatency = "
<< NewRootDepth + NewRootLatency << "\n";
- dbgs() << " RootDepth + RootLatency + RootSlack "
+ dbgs() << " RootDepth + RootLatency + RootSlack = "
<< RootDepth + RootLatency + RootSlack << "\n";);
unsigned NewCycleCount = NewRootDepth + NewRootLatency;
unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
- if (NewCodeHasLessInsts)
- return NewCycleCount <= OldCycleCount;
- else
- return NewCycleCount < OldCycleCount;
+ return NewCycleCount <= OldCycleCount;
}
/// helper routine to convert instructions into SC
@@ -271,11 +296,14 @@ void MachineCombiner::instr2instrSC(
InstrsSC.push_back(SC);
}
}
+
/// True when the new instructions do not increase resource length
bool MachineCombiner::preservesResourceLen(
MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs) {
+ if (!TSchedModel.hasInstrSchedModel())
+ return true;
// Compute current resource length
@@ -310,7 +338,7 @@ bool MachineCombiner::preservesResourceLen(
bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
if (OptSize && (NewSize < OldSize))
return true;
- if (!TSchedModel.hasInstrSchedModel())
+ if (!TSchedModel.hasInstrSchedModelOrItineraries())
return true;
return false;
}
@@ -332,7 +360,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
auto &MI = *BlockIter++;
DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
- SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Patterns;
+ SmallVector<MachineCombinerPattern, 16> Patterns;
// The motivating example is:
//
// MUL Other MUL_op1 MUL_op2 Other
@@ -358,54 +386,55 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// mostly one pattern, and getMachineCombinerPatterns() can order patterns
// based on an internal cost heuristic.
- if (TII->getMachineCombinerPatterns(MI, Patterns)) {
- for (auto P : Patterns) {
- SmallVector<MachineInstr *, 16> InsInstrs;
- SmallVector<MachineInstr *, 16> DelInstrs;
- DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
- if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
- MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ if (!TII->getMachineCombinerPatterns(MI, Patterns))
+ continue;
+
+ for (auto P : Patterns) {
+ SmallVector<MachineInstr *, 16> InsInstrs;
+ SmallVector<MachineInstr *, 16> DelInstrs;
+ DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ Traces->verifyAnalysis();
+ TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ unsigned NewInstCount = InsInstrs.size();
+ unsigned OldInstCount = DelInstrs.size();
+ // Found pattern, but did not generate alternative sequence.
+ // This can happen e.g. when an immediate could not be materialized
+ // in a single instruction.
+ if (!NewInstCount)
+ continue;
+
+ // Substitute when we optimize for codesize and the new sequence has
+ // fewer instructions OR
+ // the new sequence neither lengthens the critical path nor increases
+ // resource pressure.
+ if (doSubstitute(NewInstCount, OldInstCount) ||
+ (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
+ InstrIdxForVirtReg, P) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
+ for (auto *InstrPtr : DelInstrs)
+ InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+
+ Changed = true;
+ ++NumInstCombined;
+
+ Traces->invalidate(MBB);
Traces->verifyAnalysis();
- TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
- InstrIdxForVirtReg);
- unsigned NewInstCount = InsInstrs.size();
- unsigned OldInstCount = DelInstrs.size();
- // Found pattern, but did not generate alternative sequence.
- // This can happen e.g. when an immediate could not be materialized
- // in a single instruction.
- if (!NewInstCount)
- continue;
- // Substitute when we optimize for codesize and the new sequence has
- // fewer instructions OR
- // the new sequence neither lengthens the critical path nor increases
- // resource pressure.
- if (doSubstitute(NewInstCount, OldInstCount) ||
- (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
- InstrIdxForVirtReg,
- NewInstCount < OldInstCount) &&
- preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
- for (auto *InstrPtr : InsInstrs)
- MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
- for (auto *InstrPtr : DelInstrs)
- InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
-
- Changed = true;
- ++NumInstCombined;
-
- Traces->invalidate(MBB);
- Traces->verifyAnalysis();
- // Eagerly stop after the first pattern fires.
- break;
- } else {
- // Cleanup instructions of the alternative code sequence. There is no
- // use for them.
- MachineFunction *MF = MBB->getParent();
- for (auto *InstrPtr : InsInstrs)
- MF->DeleteMachineInstr(InstrPtr);
- }
- InstrIdxForVirtReg.clear();
+ // Eagerly stop after the first pattern fires.
+ break;
+ } else {
+ // Cleanup instructions of the alternative code sequence. There is no
+ // use for them.
+ MachineFunction *MF = MBB->getParent();
+ for (auto *InstrPtr : InsInstrs)
+ MF->DeleteMachineInstr(InstrPtr);
}
+ InstrIdxForVirtReg.clear();
}
}
@@ -420,9 +449,8 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
TSchedModel.init(SchedModel, &STI, TII);
MRI = &MF.getRegInfo();
Traces = &getAnalysis<MachineTraceMetrics>();
- MinInstr = 0;
-
- OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ MinInstr = nullptr;
+ OptSize = MF.getFunction()->optForSize();
DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index 9856e70..ca4bb1c 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionInitializer.h"
@@ -26,6 +27,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
@@ -44,6 +47,11 @@ using namespace llvm;
#define DEBUG_TYPE "codegen"
+static cl::opt<unsigned>
+ AlignAllFunctions("align-all-functions",
+ cl::desc("Force the alignment of all functions."),
+ cl::init(0), cl::Hidden);
+
void MachineFunctionInitializer::anchor() {}
//===----------------------------------------------------------------------===//
@@ -79,12 +87,27 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+ // FIXME: Use Function::optForSize().
if (!Fn->hasFnAttribute(Attribute::OptimizeForSize))
Alignment = std::max(Alignment,
STI->getTargetLowering()->getPrefFunctionAlignment());
+ if (AlignAllFunctions)
+ Alignment = AlignAllFunctions;
+
FunctionNumber = FunctionNum;
JumpTableInfo = nullptr;
+
+ if (isFuncletEHPersonality(classifyEHPersonality(
+ F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) {
+ WinEHInfo = new (Allocator) WinEHFuncInfo();
+ }
+
+ assert(TM.isCompatibleDataLayout(getDataLayout()) &&
+ "Can't create a MachineFunction using a Module with a "
+ "Target-incompatible DataLayout attached\n");
+
+ PSVManager = llvm::make_unique<PseudoSourceValueManager>();
}
MachineFunction::~MachineFunction() {
@@ -117,6 +140,11 @@ MachineFunction::~MachineFunction() {
JumpTableInfo->~MachineJumpTableInfo();
Allocator.Deallocate(JumpTableInfo);
}
+
+ if (WinEHInfo) {
+ WinEHInfo->~WinEHFuncInfo();
+ Allocator.Deallocate(WinEHInfo);
+ }
}
const DataLayout &MachineFunction::getDataLayout() const {
@@ -149,7 +177,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
if (MBB == nullptr)
MBBI = begin();
else
- MBBI = MBB;
+ MBBI = MBB->getIterator();
// Figure out the block number this should have.
unsigned BlockNo = 0;
@@ -169,7 +197,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
if (MBBNumbering[BlockNo])
MBBNumbering[BlockNo]->setNumber(-1);
- MBBNumbering[BlockNo] = MBBI;
+ MBBNumbering[BlockNo] = &*MBBI;
MBBI->setNumber(BlockNo);
}
}
@@ -322,6 +350,13 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
return std::make_pair(Result, Result + Num);
}
+const char *MachineFunction::createExternalSymbolName(StringRef Name) {
+ char *Dest = Allocator.Allocate<char>(Name.size() + 1);
+ std::copy(Name.begin(), Name.end(), Dest);
+ Dest[Name.size()] = 0;
+ return Dest;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineFunction::dump() const {
print(dbgs());
@@ -593,10 +628,9 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
BV.set(*CSR);
// Saved CSRs are not pristine.
- const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I)
- BV.reset(I->getReg());
+ for (auto &I : getCalleeSavedInfo())
+ for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
+ BV.reset(*S);
return BV;
}
@@ -801,42 +835,26 @@ Type *MachineConstantPoolEntry::getType() const {
return Val.ConstVal->getType();
}
-
-unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+bool MachineConstantPoolEntry::needsRelocation() const {
if (isMachineConstantPoolEntry())
- return Val.MachineCPVal->getRelocationInfo();
- return Val.ConstVal->getRelocationInfo();
+ return true;
+ return Val.ConstVal->needsRelocation();
}
SectionKind
MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
- SectionKind Kind;
- switch (getRelocationInfo()) {
+ if (needsRelocation())
+ return SectionKind::getReadOnlyWithRel();
+ switch (DL->getTypeAllocSize(getType())) {
+ case 4:
+ return SectionKind::getMergeableConst4();
+ case 8:
+ return SectionKind::getMergeableConst8();
+ case 16:
+ return SectionKind::getMergeableConst16();
default:
- llvm_unreachable("Unknown section kind");
- case Constant::GlobalRelocations:
- Kind = SectionKind::getReadOnlyWithRel();
- break;
- case Constant::LocalRelocation:
- Kind = SectionKind::getReadOnlyWithRelLocal();
- break;
- case Constant::NoRelocation:
- switch (DL->getTypeAllocSize(getType())) {
- case 4:
- Kind = SectionKind::getMergeableConst4();
- break;
- case 8:
- Kind = SectionKind::getMergeableConst8();
- break;
- case 16:
- Kind = SectionKind::getMergeableConst16();
- break;
- default:
- Kind = SectionKind::getReadOnly();
- break;
- }
+ return SectionKind::getReadOnly();
}
- return Kind;
}
MachineConstantPool::~MachineConstantPool() {
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index aaf06a7..05463fc 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -13,11 +13,14 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/StackProtector.h"
@@ -49,13 +52,16 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
// passes explicitly. This does not include setPreservesCFG,
// because CodeGen overloads that to mean preserving the MachineBasicBlock
// CFG in addition to the LLVM IR CFG.
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<DominanceFrontier>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<IVUsers>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
AU.addPreserved<StackProtector>();
FunctionPass::getAnalysisUsage(AU);
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index fdc4226..1eb2edc 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -43,6 +44,11 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+static cl::opt<bool> PrintWholeRegMask(
+ "print-whole-regmask",
+ cl::desc("Print the full contents of regmask operands in IR dumps"),
+ cl::init(true), cl::Hidden);
+
//===----------------------------------------------------------------------===//
// MachineOperand Implementation
//===----------------------------------------------------------------------===//
@@ -407,9 +413,26 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (getOffset()) OS << "+" << getOffset();
OS << '>';
break;
- case MachineOperand::MO_RegisterMask:
- OS << "<regmask>";
+ case MachineOperand::MO_RegisterMask: {
+ unsigned NumRegsInMask = 0;
+ unsigned NumRegsEmitted = 0;
+ OS << "<regmask";
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+ unsigned MaskWord = i / 32;
+ unsigned MaskBit = i % 32;
+ if (getRegMask()[MaskWord] & (1 << MaskBit)) {
+ if (PrintWholeRegMask || NumRegsEmitted <= 10) {
+ OS << " " << PrintReg(i, TRI);
+ NumRegsEmitted++;
+ }
+ NumRegsInMask++;
+ }
+ }
+ if (NumRegsEmitted != NumRegsInMask)
+ OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
+ OS << ">";
break;
+ }
case MachineOperand::MO_RegisterLiveOut:
OS << "<regliveout>";
break;
@@ -443,26 +466,28 @@ unsigned MachinePointerInfo::getAddrSpace() const {
/// getConstantPool - Return a MachinePointerInfo record that refers to the
/// constant pool.
-MachinePointerInfo MachinePointerInfo::getConstantPool() {
- return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getConstantPool());
}
/// getFixedStack - Return a MachinePointerInfo record that refers to the
/// the specified FrameIndex.
-MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
- return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF,
+ int FI, int64_t Offset) {
+ return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset);
}
-MachinePointerInfo MachinePointerInfo::getJumpTable() {
- return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getJumpTable());
}
-MachinePointerInfo MachinePointerInfo::getGOT() {
- return MachinePointerInfo(PseudoSourceValue::getGOT());
+MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getGOT());
}
-MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
- return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
+ int64_t Offset) {
+ return MachinePointerInfo(MF.getPSVManager().getStack(), Offset);
}
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
@@ -606,10 +631,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
if (MCID->ImplicitDefs)
- for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs;
+ ++ImpDefs)
addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true));
if (MCID->ImplicitUses)
- for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
+ for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses;
+ ++ImpUses)
addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true));
}
@@ -841,7 +868,7 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
assert(!isBundledWithPred() && "Must be called on bundle header");
- for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) {
+ for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
if (MII->getDesc().getFlags() & Mask) {
if (Type == AnyInBundle)
return true;
@@ -865,13 +892,13 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
if (isBundle()) {
// Both instructions are bundles, compare MIs inside the bundle.
- MachineBasicBlock::const_instr_iterator I1 = *this;
+ MachineBasicBlock::const_instr_iterator I1 = getIterator();
MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
- MachineBasicBlock::const_instr_iterator I2 = *Other;
+ MachineBasicBlock::const_instr_iterator I2 = Other->getIterator();
MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
while (++I1 != E1 && I1->isInsideBundle()) {
++I2;
- if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+ if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(&*I2, Check))
return false;
}
}
@@ -976,7 +1003,7 @@ unsigned MachineInstr::getNumExplicitOperands() const {
void MachineInstr::bundleWithPred() {
assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
setFlag(BundledPred);
- MachineBasicBlock::instr_iterator Pred = this;
+ MachineBasicBlock::instr_iterator Pred = getIterator();
--Pred;
assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags");
Pred->setFlag(BundledSucc);
@@ -985,7 +1012,7 @@ void MachineInstr::bundleWithPred() {
void MachineInstr::bundleWithSucc() {
assert(!isBundledWithSucc() && "MI is already bundled with its successor");
setFlag(BundledSucc);
- MachineBasicBlock::instr_iterator Succ = this;
+ MachineBasicBlock::instr_iterator Succ = getIterator();
++Succ;
assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags");
Succ->setFlag(BundledPred);
@@ -994,7 +1021,7 @@ void MachineInstr::bundleWithSucc() {
void MachineInstr::unbundleFromPred() {
assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
clearFlag(BundledPred);
- MachineBasicBlock::instr_iterator Pred = this;
+ MachineBasicBlock::instr_iterator Pred = getIterator();
--Pred;
assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags");
Pred->clearFlag(BundledSucc);
@@ -1003,7 +1030,7 @@ void MachineInstr::unbundleFromPred() {
void MachineInstr::unbundleFromSucc() {
assert(isBundledWithSucc() && "MI isn't bundled with its successor");
clearFlag(BundledSucc);
- MachineBasicBlock::instr_iterator Succ = this;
+ MachineBasicBlock::instr_iterator Succ = getIterator();
++Succ;
assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
Succ->clearFlag(BundledPred);
@@ -1139,7 +1166,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect(
/// Return the number of instructions inside the MI bundle, not counting the
/// header instruction.
unsigned MachineInstr::getBundleSize() const {
- MachineBasicBlock::const_instr_iterator I = this;
+ MachineBasicBlock::const_instr_iterator I = getIterator();
unsigned Size = 0;
while (I->isBundledWithSucc())
++Size, ++I;
@@ -1501,6 +1528,10 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
return false;
}
+bool MachineInstr::isLoadFoldBarrier() const {
+ return mayStore() || isCall() || hasUnmodeledSideEffects();
+}
+
/// allDefsAreDead - Return true if all the defs of this instruction are dead.
///
bool MachineInstr::allDefsAreDead() const {
@@ -1615,7 +1646,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
FirstOp = false;
}
-
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
@@ -1706,13 +1736,16 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
bool HaveSemi = false;
- const unsigned PrintableFlags = FrameSetup;
+ const unsigned PrintableFlags = FrameSetup | FrameDestroy;
if (Flags & PrintableFlags) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
OS << " flags: ";
if (Flags & FrameSetup)
OS << "FrameSetup";
+
+ if (Flags & FrameDestroy)
+ OS << "FrameDestroy";
}
if (!memoperands_empty()) {
@@ -1755,7 +1788,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
DebugLoc InlinedAtDL(InlinedAt);
if (InlinedAtDL && MF) {
OS << " inlined @[ ";
- InlinedAtDL.print(OS);
+ InlinedAtDL.print(OS);
OS << " ]";
}
}
@@ -1902,11 +1935,11 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) {
}
}
-void MachineInstr::addRegisterDefReadUndef(unsigned Reg) {
+void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
continue;
- MO.setIsUndef();
+ MO.setIsUndef(IsUndef);
}
}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index cd820ee..3eaf4c5 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -293,15 +293,17 @@ MachineOperandIteratorBase::PhysRegInfo
MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
const TargetRegisterInfo *TRI) {
bool AllDefsDead = true;
- PhysRegInfo PRI = {false, false, false, false, false, false};
+ PhysRegInfo PRI = {false, false, false, false, false, false, false};
assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
"analyzePhysReg not given a physical register!");
for (; isValid(); ++*this) {
MachineOperand &MO = deref();
- if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
- PRI.Clobbers = true; // Regmask clobbers Reg.
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) {
+ PRI.Clobbered = true;
+ continue;
+ }
if (!MO.isReg())
continue;
@@ -310,33 +312,28 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
continue;
- bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
- bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
-
- if (IsRegOrSuperReg && MO.readsReg()) {
- // Reg or a super-reg is read, and perhaps killed also.
- PRI.Reads = true;
- PRI.Kills = MO.isKill();
- }
-
- if (IsRegOrOverlapping && MO.readsReg()) {
- PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
- }
-
- if (!MO.isDef())
+ if (!TRI->regsOverlap(MOReg, Reg))
continue;
- if (IsRegOrSuperReg) {
- PRI.Defines = true; // Reg or a super-register is defined.
+ bool Covered = TRI->isSuperRegisterEq(MOReg, Reg);
+ if (MO.readsReg()) {
+ PRI.Read = true;
+ if (Covered) {
+ PRI.FullyRead = true;
+ if (MO.isKill())
+ PRI.Killed = true;
+ }
+ } else if (MO.isDef()) {
+ PRI.Defined = true;
+ if (Covered)
+ PRI.FullyDefined = true;
if (!MO.isDead())
AllDefsDead = false;
}
- if (IsRegOrOverlapping)
- PRI.Clobbers = true; // Reg or an overlapping reg is defined.
}
- if (AllDefsDead && PRI.Defines)
- PRI.DefinesDead = true; // Reg or super-register was defined and was dead.
+ if (AllDefsDead && PRI.FullyDefined)
+ PRI.DeadDef = true;
return PRI;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index e9ea5ed..a8368e9 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -138,7 +138,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -153,7 +153,7 @@ namespace {
}
private:
- /// CandidateInfo - Keep track of information about hoisting candidates.
+ /// Keep track of information about hoisting candidates.
struct CandidateInfo {
MachineInstr *MI;
unsigned Def;
@@ -162,149 +162,76 @@ namespace {
: MI(mi), Def(def), FI(fi) {}
};
- /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
- /// invariants out to the preheader.
void HoistRegionPostRA();
- /// HoistPostRA - When an instruction is found to only use loop invariant
- /// operands that is safe to hoist, this instruction is called to do the
- /// dirty work.
void HoistPostRA(MachineInstr *MI, unsigned Def);
- /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
- /// gather register def and frame object update information.
- void ProcessMI(MachineInstr *MI,
- BitVector &PhysRegDefs,
- BitVector &PhysRegClobbers,
- SmallSet<int, 32> &StoredFIs,
+ void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
SmallVectorImpl<CandidateInfo> &Candidates);
- /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
- /// current loop.
void AddToLiveIns(unsigned Reg);
- /// IsLICMCandidate - Returns true if the instruction may be a suitable
- /// candidate for LICM. e.g. If the instruction is a call, then it's
- /// obviously not safe to hoist it.
bool IsLICMCandidate(MachineInstr &I);
- /// IsLoopInvariantInst - Returns true if the instruction is loop
- /// invariant. I.e., all virtual register operands are defined outside of
- /// the loop, physical registers aren't accessed (explicitly or implicitly),
- /// and the instruction is hoistable.
- ///
bool IsLoopInvariantInst(MachineInstr &I);
- /// HasLoopPHIUse - Return true if the specified instruction is used by any
- /// phi node in the current loop.
bool HasLoopPHIUse(const MachineInstr *MI) const;
- /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
- /// and an use in the current loop, return true if the target considered
- /// it 'high'.
bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
unsigned Reg) const;
bool IsCheapInstruction(MachineInstr &MI) const;
- /// CanCauseHighRegPressure - Visit BBs from header to current BB,
- /// check if hoisting an instruction of the given cost matrix can cause high
- /// register pressure.
bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
bool Cheap);
- /// UpdateBackTraceRegPressure - Traverse the back trace from header to
- /// the current block and update their register pressures to reflect the
- /// effect of hoisting MI from the current block to the preheader.
void UpdateBackTraceRegPressure(const MachineInstr *MI);
- /// IsProfitableToHoist - Return true if it is potentially profitable to
- /// hoist the given loop invariant.
bool IsProfitableToHoist(MachineInstr &MI);
- /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
- /// If not then a load from this mbb may not be safe to hoist.
bool IsGuaranteedToExecute(MachineBasicBlock *BB);
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
- /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
- /// dominator tree node if its a leaf or all of its children are done. Walk
- /// up the dominator tree to destroy ancestors which are now done.
- void ExitScopeIfDone(MachineDomTreeNode *Node,
- DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
-
- /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
- /// blocks dominated by the specified header block, and that are in the
- /// current loop) in depth first order w.r.t the DominatorTree. This allows
- /// us to visit definitions before uses, allowing us to hoist a loop body in
- /// one pass without iteration.
- ///
+ void ExitScopeIfDone(
+ MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
+
void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+
void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
- /// SinkIntoLoop - Sink instructions into loops if profitable. This
- /// especially tries to prevent register spills caused by register pressure
- /// if there is little to no overhead moving instructions into loops.
void SinkIntoLoop();
- /// InitRegPressure - Find all virtual register references that are liveout
- /// of the preheader to initialize the starting "register pressure". Note
- /// this does not count live through (livein but not used) registers.
void InitRegPressure(MachineBasicBlock *BB);
- /// calcRegisterCost - Calculate the additional register pressure that the
- /// registers used in MI cause.
- ///
- /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
- /// figure out which usages are live-ins.
- /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
bool ConsiderSeen,
bool ConsiderUnseenAsDef);
- /// UpdateRegPressure - Update estimate of register pressure after the
- /// specified instruction.
void UpdateRegPressure(const MachineInstr *MI,
bool ConsiderUnseenAsDef = false);
- /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
- /// the load itself could be hoisted. Return the unfolded and hoistable
- /// load, or null if the load couldn't be unfolded or if it wouldn't
- /// be hoistable.
MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
- /// LookForDuplicate - Find an instruction amount PrevMIs that is a
- /// duplicate of MI. Return this instruction if it's found.
- const MachineInstr *LookForDuplicate(const MachineInstr *MI,
- std::vector<const MachineInstr*> &PrevMIs);
+ const MachineInstr *
+ LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr *> &PrevMIs);
- /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
- /// the preheader that compute the same value. If it's found, do a RAU on
- /// with the definition of the existing instruction rather than hoisting
- /// the instruction to the preheader.
- bool EliminateCSE(MachineInstr *MI,
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+ bool EliminateCSE(
+ MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
- /// MayCSE - Return true if the given instruction will be CSE'd if it's
- /// hoisted out of the loop.
bool MayCSE(MachineInstr *MI);
- /// Hoist - When an instruction is found to only use loop invariant operands
- /// that is safe to hoist, this instruction is called to do the dirty work.
- /// It returns true if the instruction is hoisted.
bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
- /// InitCSEMap - Initialize the CSE map with instructions that are in the
- /// current loop preheader that may become duplicates of instructions that
- /// are hoisted out of the loop.
void InitCSEMap(MachineBasicBlock *BB);
- /// getCurPreheader - Get the preheader for the current loop, splitting
- /// a critical edge if needed.
MachineBasicBlock *getCurPreheader();
};
} // end anonymous namespace
@@ -315,12 +242,11 @@ INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
"Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineLICM, "machinelicm",
"Machine Loop Invariant Code Motion", false, false)
-/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
-/// loop that has a unique predecessor.
+/// Test if the given loop is the outer-most loop that has a unique predecessor.
static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
// Check whether this loop even has a unique predecessor.
if (!CurLoop->getLoopPredecessor())
@@ -367,7 +293,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
// Get our Loop information...
MLI = &getAnalysis<MachineLoopInfo>();
DT = &getAnalysis<MachineDominatorTree>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
while (!Worklist.empty()) {
@@ -402,9 +328,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-/// InstructionStoresToFI - Return true if instruction stores to the
-/// specified frame.
+/// Return true if instruction stores to the specified frame.
static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+ // If we lost memory operands, conservatively assume that the instruction
+ // writes to all slots.
+ if (MI->memoperands_empty())
+ return true;
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end(); o != oe; ++o) {
if (!(*o)->isStore() || !(*o)->getPseudoValue())
@@ -418,7 +347,7 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
return false;
}
-/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
void MachineLICM::ProcessMI(MachineInstr *MI,
BitVector &PhysRegDefs,
@@ -506,8 +435,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
}
}
-/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
-/// invariants out to the preheader.
+/// Walk the specified region of the CFG and hoist loop invariants out to the
+/// preheader.
void MachineLICM::HoistRegionPostRA() {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
@@ -529,15 +458,13 @@ void MachineLICM::HoistRegionPostRA() {
// If the header of the loop containing this basic block is a landing pad,
// then don't try to hoist instructions out of this loop.
const MachineLoop *ML = MLI->getLoopFor(BB);
- if (ML && ML->getHeader()->isLandingPad()) continue;
+ if (ML && ML->getHeader()->isEHPad()) continue;
// Conservatively treat live-in's as an external def.
// FIXME: That means a reload that're reused in successor block(s) will not
// be LICM'ed.
- for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
- E = BB->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ for (const auto &LI : BB->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
PhysRegDefs.set(*AI);
}
@@ -601,8 +528,8 @@ void MachineLICM::HoistRegionPostRA() {
}
}
-/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
-/// loop, and make sure it is not killed by any instructions in the loop.
+/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
+/// sure it is not killed by any instructions in the loop.
void MachineLICM::AddToLiveIns(unsigned Reg) {
const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
@@ -622,9 +549,8 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
}
}
-/// HoistPostRA - When an instruction is found to only use loop invariant
-/// operands that is safe to hoist, this instruction is called to do the
-/// dirty work.
+/// When an instruction is found to only use loop invariant operands that is
+/// safe to hoist, this instruction is called to do the dirty work.
void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
MachineBasicBlock *Preheader = getCurPreheader();
@@ -646,8 +572,8 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
Changed = true;
}
-// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
-// If not then a load from this mbb may not be safe to hoist.
+/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
+/// may not be safe to hoist.
bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
if (SpeculationState != SpeculateUnknown)
return SpeculationState == SpeculateFalse;
@@ -679,9 +605,9 @@ void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
BackTrace.pop_back();
}
-/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
-/// dominator tree node if its a leaf or all of its children are done. Walk
-/// up the dominator tree to destroy ancestors which are now done.
+/// Destroy scope for the MBB that corresponds to the given dominator tree node
+/// if its a leaf or all of its children are done. Walk up the dominator tree to
+/// destroy ancestors which are now done.
void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
@@ -701,11 +627,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
}
}
-/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
-/// blocks dominated by the specified header block, and that are in the
-/// current loop) in depth first order w.r.t the DominatorTree. This allows
-/// us to visit definitions before uses, allowing us to hoist a loop body in
-/// one pass without iteration.
+/// Walk the specified loop in the CFG (defined by all blocks dominated by the
+/// specified header block, and that are in the current loop) in depth first
+/// order w.r.t the DominatorTree. This allows us to visit definitions before
+/// uses, allowing us to hoist a loop body in one pass without iteration.
///
void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
MachineBasicBlock *Preheader = getCurPreheader();
@@ -727,7 +652,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
// If the header of the loop containing this basic block is a landing pad,
// then don't try to hoist instructions out of this loop.
const MachineLoop *ML = MLI->getLoopFor(BB);
- if (ML && ML->getHeader()->isLandingPad())
+ if (ML && ML->getHeader()->isEHPad())
continue;
// If this subregion is not in the top level loop at all, exit.
@@ -786,6 +711,9 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
}
}
+/// Sink instructions into loops if profitable. This especially tries to prevent
+/// register spills caused by register pressure if there is little to no
+/// overhead moving instructions into loops.
void MachineLICM::SinkIntoLoop() {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
@@ -796,8 +724,8 @@ void MachineLICM::SinkIntoLoop() {
I != Preheader->instr_end(); ++I) {
// We need to ensure that we can safely move this instruction into the loop.
// As such, it must not have side-effects, e.g. such as a call has.
- if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
- Candidates.push_back(I);
+ if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
+ Candidates.push_back(&*I);
}
for (MachineInstr *I : Candidates) {
@@ -837,9 +765,9 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
}
-/// InitRegPressure - Find all virtual register references that are liveout of
-/// the preheader to initialize the starting "register pressure". Note this
-/// does not count live through (livein but not used) registers.
+/// Find all virtual register references that are liveout of the preheader to
+/// initialize the starting "register pressure". Note this does not count live
+/// through (livein but not used) registers.
void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
std::fill(RegPressure.begin(), RegPressure.end(), 0);
@@ -858,8 +786,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
}
-/// UpdateRegPressure - Update estimate of register pressure after the
-/// specified instruction.
+/// Update estimate of register pressure after the specified instruction.
void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
bool ConsiderUnseenAsDef) {
auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
@@ -872,6 +799,12 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
}
}
+/// Calculate the additional register pressure that the registers used in MI
+/// cause.
+///
+/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
+/// figure out which usages are live-ins.
+/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
DenseMap<unsigned, int>
MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
bool ConsiderUnseenAsDef) {
@@ -915,23 +848,28 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
return Cost;
}
-/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
-/// loads from global offset table or constant pool.
-static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
+/// Return true if this machine instruction loads from global offset table or
+/// constant pool.
+static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
assert (MI.mayLoad() && "Expected MI that loads!");
+
+ // If we lost memory operands, conservatively assume that the instruction
+ // reads from everything..
+ if (MI.memoperands_empty())
+ return true;
+
for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
E = MI.memoperands_end(); I != E; ++I) {
if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
- if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+ if (PSV->isGOT() || PSV->isConstantPool())
return true;
}
}
return false;
}
-/// IsLICMCandidate - Returns true if the instruction may be a suitable
-/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
-/// not safe to hoist it.
+/// Returns true if the instruction may be a suitable candidate for LICM.
+/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
// Check if it's safe to move the instruction.
bool DontMoveAcrossStore = true;
@@ -944,16 +882,16 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
// from constant memory are not safe to speculate all the time, for example
// indexed load from a jump table.
// Stores and side effects are already checked by isSafeToMove.
- if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+ if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
!IsGuaranteedToExecute(I.getParent()))
return false;
return true;
}
-/// IsLoopInvariantInst - Returns true if the instruction is loop
-/// invariant. I.e., all virtual register operands are defined outside of the
-/// loop, physical registers aren't accessed explicitly, and there are no side
+/// Returns true if the instruction is loop invariant.
+/// I.e., all virtual register operands are defined outside of the loop,
+/// physical registers aren't accessed explicitly, and there are no side
/// effects that aren't captured by the operands or other flags.
///
bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
@@ -1007,8 +945,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
}
-/// HasLoopPHIUse - Return true if the specified instruction is used by a
-/// phi node and hoisting it could cause a copy to be inserted.
+/// Return true if the specified instruction is used by a phi node and hoisting
+/// it could cause a copy to be inserted.
bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
SmallVector<const MachineInstr*, 8> Work(1, MI);
do {
@@ -1042,9 +980,8 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
return false;
}
-/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
-/// and an use in the current loop, return true if the target considered
-/// it 'high'.
+/// Compute operand latency between a def of 'Reg' and an use in the current
+/// loop, return true if the target considered it high.
bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
unsigned DefIdx, unsigned Reg) const {
if (MRI->use_nodbg_empty(Reg))
@@ -1074,8 +1011,8 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
return false;
}
-/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
-/// the operand latency between its def and a use is one or less.
+/// Return true if the instruction is marked "cheap" or the operand latency
+/// between its def and a use is one or less.
bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike())
return true;
@@ -1099,9 +1036,8 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
return isCheap;
}
-/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
-/// if hoisting an instruction of the given cost matrix can cause high
-/// register pressure.
+/// Visit BBs from header to current BB, check if hoisting an instruction of the
+/// given cost matrix can cause high register pressure.
bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
bool CheapInstr) {
for (const auto &RPIdAndCost : Cost) {
@@ -1124,9 +1060,9 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
return false;
}
-/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
-/// current block and update their register pressures to reflect the effect
-/// of hoisting MI from the current block to the preheader.
+/// Traverse the back trace from header to the current block and update their
+/// register pressures to reflect the effect of hoisting MI from the current
+/// block to the preheader.
void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
// First compute the 'cost' of the instruction, i.e. its contribution
// to register pressure.
@@ -1139,8 +1075,8 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
RP[RPIdAndCost.first] += RPIdAndCost.second;
}
-/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
-/// the given loop invariant.
+/// Return true if it is potentially profitable to hoist the given loop
+/// invariant.
bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
if (MI.isImplicitDef())
return true;
@@ -1230,6 +1166,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
return true;
}
+/// Unfold a load from the given machineinstr if the load itself could be
+/// hoisted. Return the unfolded and hoistable load, or null if the load
+/// couldn't be unfolded or if it wouldn't be hoistable.
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
// Don't unfold simple loads.
if (MI->canFoldAsLoad())
@@ -1287,6 +1226,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
return NewMIs[0];
}
+/// Initialize the CSE map with instructions that are in the current loop
+/// preheader that may become duplicates of instructions that are hoisted
+/// out of the loop.
void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
const MachineInstr *MI = &*I;
@@ -1295,6 +1237,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
}
}
+/// Find an instruction amount PrevMIs that is a duplicate of MI.
+/// Return this instruction if it's found.
const MachineInstr*
MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
@@ -1306,6 +1250,10 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
return nullptr;
}
+/// Given a LICM'ed instruction, look for an instruction on the preheader that
+/// computes the same value. If it's found, do a RAU on with the definition of
+/// the existing instruction rather than hoisting the instruction to the
+/// preheader.
bool MachineLICM::EliminateCSE(MachineInstr *MI,
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
@@ -1363,8 +1311,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
return false;
}
-/// MayCSE - Return true if the given instruction will be CSE'd if it's
-/// hoisted out of the loop.
+/// Return true if the given instruction will be CSE'd if it's hoisted out of
+/// the loop.
bool MachineLICM::MayCSE(MachineInstr *MI) {
unsigned Opcode = MI->getOpcode();
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
@@ -1377,9 +1325,9 @@ bool MachineLICM::MayCSE(MachineInstr *MI) {
return LookForDuplicate(MI, CI->second) != nullptr;
}
-/// Hoist - When an instruction is found to use only loop invariant operands
+/// When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
-///
+/// It returns true if the instruction is hoisted.
bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
@@ -1441,6 +1389,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
return true;
}
+/// Get the preheader for the current loop, splitting a critical edge if needed.
MachineBasicBlock *MachineLICM::getCurPreheader() {
// Determine the block to which to hoist instructions. If we can't find a
// suitable loop predecessor, we can't do any hoisting.
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index ce6abdd..2f5c9e0 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -37,7 +37,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
releaseMemory();
- LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
+ LI.analyze(getAnalysis<MachineDominatorTree>().getBase());
return false;
}
@@ -51,11 +51,11 @@ MachineBasicBlock *MachineLoop::getTopBlock() {
MachineBasicBlock *TopMBB = getHeader();
MachineFunction::iterator Begin = TopMBB->getParent()->begin();
if (TopMBB != Begin) {
- MachineBasicBlock *PriorMBB = std::prev(MachineFunction::iterator(TopMBB));
+ MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator());
while (contains(PriorMBB)) {
TopMBB = PriorMBB;
if (TopMBB == Begin) break;
- PriorMBB = std::prev(MachineFunction::iterator(TopMBB));
+ PriorMBB = &*std::prev(TopMBB->getIterator());
}
}
return TopMBB;
@@ -65,11 +65,12 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
MachineBasicBlock *BotMBB = getHeader();
MachineFunction::iterator End = BotMBB->getParent()->end();
if (BotMBB != std::prev(End)) {
- MachineBasicBlock *NextMBB = std::next(MachineFunction::iterator(BotMBB));
+ MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator());
while (contains(NextMBB)) {
BotMBB = NextMBB;
- if (BotMBB == std::next(MachineFunction::iterator(BotMBB))) break;
- NextMBB = std::next(MachineFunction::iterator(BotMBB));
+ if (BotMBB == &*std::next(BotMBB->getIterator()))
+ break;
+ NextMBB = &*std::next(BotMBB->getIterator());
}
}
return BotMBB;
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 6a20624..1956a70 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -9,12 +9,12 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/PointerUnion.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
@@ -35,7 +35,7 @@ char MachineModuleInfo::ID = 0;
MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
namespace llvm {
-class MMIAddrLabelMapCallbackPtr : CallbackVH {
+class MMIAddrLabelMapCallbackPtr final : CallbackVH {
MMIAddrLabelMap *Map;
public:
MMIAddrLabelMapCallbackPtr() : Map(nullptr) {}
@@ -209,9 +209,8 @@ bool MachineModuleInfo::doInitialization(Module &M) {
CurCallSite = 0;
CallsEHReturn = false;
CallsUnwindInit = false;
+ HasEHFunclets = false;
DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
- // Always emit some info, by default "no personality" info.
- Personalities.push_back(nullptr);
PersonalityTypeCache = EHPersonality::Unknown;
AddrLabelSymbols = nullptr;
TheModule = nullptr;
@@ -249,6 +248,7 @@ void MachineModuleInfo::EndFunction() {
FilterEnds.clear();
CallsEHReturn = false;
CallsUnwindInit = false;
+ HasEHFunclets = false;
VariableDbgInfos.clear();
}
@@ -314,32 +314,11 @@ MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
return LandingPadLabel;
}
-/// addPersonality - Provide the personality function for the exception
-/// information.
-void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
- const Function *Personality) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- LP.Personality = Personality;
- addPersonality(Personality);
-}
-
void MachineModuleInfo::addPersonality(const Function *Personality) {
for (unsigned i = 0; i < Personalities.size(); ++i)
if (Personalities[i] == Personality)
return;
-
- // If this is the first personality we're adding go
- // ahead and add it at the beginning.
- if (!Personalities[0])
- Personalities[0] = Personality;
- else
- Personalities.push_back(Personality);
-}
-
-void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad,
- int State) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- LP.WinEHState = State;
+ Personalities.push_back(Personality);
}
/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
@@ -481,56 +460,3 @@ try_next:;
FilterIds.push_back(0); // terminator
return FilterID;
}
-
-/// getPersonality - Return the personality function for the current function.
-const Function *MachineModuleInfo::getPersonality() const {
- for (const LandingPadInfo &LPI : LandingPads)
- if (LPI.Personality)
- return LPI.Personality;
- return nullptr;
-}
-
-EHPersonality MachineModuleInfo::getPersonalityType() {
- if (PersonalityTypeCache == EHPersonality::Unknown) {
- if (const Function *F = getPersonality())
- PersonalityTypeCache = classifyEHPersonality(F);
- }
- return PersonalityTypeCache;
-}
-
-/// getPersonalityIndex - Return unique index for current personality
-/// function. NULL/first personality function should always get zero index.
-unsigned MachineModuleInfo::getPersonalityIndex() const {
- const Function* Personality = nullptr;
-
- // Scan landing pads. If there is at least one non-NULL personality - use it.
- for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
- if (LandingPads[i].Personality) {
- Personality = LandingPads[i].Personality;
- break;
- }
-
- for (unsigned i = 0, e = Personalities.size(); i < e; ++i) {
- if (Personalities[i] == Personality)
- return i;
- }
-
- // This will happen if the current personality function is
- // in the zero index.
- return 0;
-}
-
-const Function *MachineModuleInfo::getWinEHParent(const Function *F) const {
- StringRef WinEHParentName =
- F->getFnAttribute("wineh-parent").getValueAsString();
- if (WinEHParentName.empty() || WinEHParentName == F->getName())
- return F;
- return F->getParent()->getFunction(WinEHParentName);
-}
-
-WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) {
- auto &Ptr = FuncInfoMap[getWinEHParent(F)];
- if (!Ptr)
- Ptr.reset(new WinEHFuncInfo);
- return *Ptr;
-}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index e883ce5..03c82f4 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -27,13 +27,11 @@ void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
: MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true),
TracksSubRegLiveness(false) {
+ unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
- UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
- UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
-
- // Create the physreg use/def lists.
- PhysRegUseDefLists.resize(getTargetRegisterInfo()->getNumRegs(), nullptr);
+ UsedPhysRegMask.resize(NumRegs);
+ PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]());
}
/// setRegClass - Set the register class of the specified virtual register.
@@ -117,6 +115,8 @@ void MachineRegisterInfo::clearVirtRegs() {
}
#endif
VRegInfo.clear();
+ for (auto &I : LiveIns)
+ I.second = 0;
}
void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
@@ -394,8 +394,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
}
}
-unsigned MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const
-{
+LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
// Lane masks are only defined for vregs.
assert(TargetRegisterInfo::isVirtualRegister(Reg));
const TargetRegisterClass &TRC = *getRegClass(Reg);
@@ -468,11 +467,8 @@ static bool isNoReturnDef(const MachineOperand &MO) {
if (MF.getFunction()->hasFnAttribute(Attribute::UWTable))
return false;
const Function *Called = getCalledFunction(MI);
- if (Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn)
- || !Called->hasFnAttribute(Attribute::NoUnwind))
- return false;
-
- return true;
+ return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) ||
+ !Called->hasFnAttribute(Attribute::NoUnwind));
}
bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const {
@@ -488,3 +484,15 @@ bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const {
}
return false;
}
+
+bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const {
+ if (UsedPhysRegMask.test(PhysReg))
+ return true;
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid();
+ ++AliasReg) {
+ if (!reg_nodbg_empty(*AliasReg))
+ return true;
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index a48e54c..bcee15c 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -49,6 +49,11 @@ DumpCriticalPathLength("misched-dcpl", cl::Hidden,
static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
cl::desc("Pop up a window to show MISched dags after they are processed"));
+/// In some situations a few uninteresting nodes depend on nearly all other
+/// nodes in the graph, provide a cutoff to hide them.
+static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
+ cl::desc("Hide nodes with more predecessor/successor than cutoff"));
+
static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
@@ -106,7 +111,7 @@ public:
void print(raw_ostream &O, const Module* = nullptr) const override;
protected:
- void scheduleRegions(ScheduleDAGInstrs &Scheduler);
+ void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
};
/// MachineScheduler runs after coalescing and before register allocation.
@@ -146,7 +151,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID;
INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
"Machine Instruction Scheduler", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
@@ -161,7 +166,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequiredID(MachineDominatorsID);
AU.addRequired<MachineLoopInfo>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
@@ -315,14 +320,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
} else if (!mf.getSubtarget().enableMachineScheduler())
return false;
- DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
+ DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
// Initialize the context of the pass.
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
MDT = &getAnalysis<MachineDominatorTree>();
PassConfig = &getAnalysis<TargetPassConfig>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LIS = &getAnalysis<LiveIntervals>();
@@ -335,7 +340,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// Instantiate the selected scheduler for this target, function, and
// optimization level.
std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
- scheduleRegions(*Scheduler);
+ scheduleRegions(*Scheduler, false);
DEBUG(LIS->dump());
if (VerifyScheduling)
@@ -363,7 +368,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// Instantiate the selected scheduler for this target, function, and
// optimization level.
std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
- scheduleRegions(*Scheduler);
+ scheduleRegions(*Scheduler, true);
if (VerifyScheduling)
MF->verify(this, "After post machine scheduling.");
@@ -383,15 +388,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
static bool isSchedBoundary(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB,
MachineFunction *MF,
- const TargetInstrInfo *TII,
- bool IsPostRA) {
+ const TargetInstrInfo *TII) {
return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
}
/// Main driver for both MachineScheduler and PostMachineScheduler.
-void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
+void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
+ bool FixKillFlags) {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- bool IsPostRA = Scheduler.isPostRA();
// Visit all machine basic blocks.
//
@@ -400,7 +404,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
MBB != MBBEnd; ++MBB) {
- Scheduler.startBlock(MBB);
+ Scheduler.startBlock(&*MBB);
#ifndef NDEBUG
if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
@@ -429,7 +433,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
// Avoid decrementing RegionEnd for blocks with no terminator.
if (RegionEnd != MBB->end() ||
- isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
+ isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
--RegionEnd;
// Count the boundary instruction.
--RemainingInstrs;
@@ -440,14 +444,14 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
for(;I != MBB->begin(); --I, --RemainingInstrs) {
- if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
+ if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
break;
if (!I->isDebugValue())
++NumRegionInstrs;
}
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
- Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
+ Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
// Skip empty scheduling regions (0 or 1 schedulable instructions).
if (I == RegionEnd || I == std::prev(RegionEnd)) {
@@ -456,8 +460,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
Scheduler.exitRegion();
continue;
}
- DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
- << "MI Scheduling **********\n");
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
DEBUG(dbgs() << MF->getName()
<< ":BB#" << MBB->getNumber() << " " << MBB->getName()
<< "\n From: " << *I << " To: ";
@@ -484,11 +487,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
}
assert(RemainingInstrs == 0 && "Instruction count mismatch!");
Scheduler.finishBlock();
- if (Scheduler.isPostRA()) {
- // FIXME: Ideally, no further passes should rely on kill flags. However,
- // thumb2 size reduction is currently an exception.
- Scheduler.fixupKills(MBB);
- }
+ // FIXME: Ideally, no further passes should rely on kill flags. However,
+ // thumb2 size reduction is currently an exception, so the PostMIScheduler
+ // needs to do this.
+ if (FixKillFlags)
+ Scheduler.fixupKills(&*MBB);
}
Scheduler.finalizeSchedule();
}
@@ -499,7 +502,7 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
LLVM_DUMP_METHOD
void ReadyQueue::dump() {
- dbgs() << Name << ": ";
+ dbgs() << "Queue " << Name << ": ";
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
dbgs() << Queue[i]->NodeNum << " ";
dbgs() << "\n";
@@ -660,6 +663,9 @@ bool ScheduleDAGMI::checkSchedLimit() {
/// does not consider liveness or register pressure. It is useful for PostRA
/// scheduling and potentially other custom schedulers.
void ScheduleDAGMI::schedule() {
+ DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
+ DEBUG(SchedImpl->dumpPolicy());
+
// Build the DAG.
buildSchedGraph(AA);
@@ -682,7 +688,11 @@ void ScheduleDAGMI::schedule() {
initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
- while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ while (true) {
+ DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
assert(!SU->isScheduled && "Node already scheduled");
if (!checkSchedLimit())
break;
@@ -900,6 +910,13 @@ void ScheduleDAGMILive::initRegPressure() {
updatePressureDiffs(LiveUses);
}
+ DEBUG(
+ dbgs() << "Top Pressure:\n";
+ dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+ dbgs() << "Bottom Pressure:\n";
+ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
+ );
+
assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
// Cache the list of excess pressure sets in this region. This will also track
@@ -976,18 +993,24 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
}
// RegisterPressureTracker guarantees that readsReg is true for LiveUses.
assert(VNI && "No live value at use.");
- for (VReg2UseMap::iterator
- UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
- SUnit *SU = UI->SU;
- DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
- << *SU->getInstr());
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
// If this use comes before the reaching def, it cannot be a last use, so
// descrease its pressure change.
if (!SU->isScheduled && SU != &ExitSU) {
LiveQueryResult LRQ
= LI.Query(LIS->getInstructionIndex(SU->getInstr()));
- if (LRQ.valueIn() == VNI)
- getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
+ if (LRQ.valueIn() == VNI) {
+ PressureDiff &PDiff = getPressureDiff(SU);
+ PDiff.addPressureChange(Reg, true, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
+ << *SU->getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
}
}
}
@@ -998,12 +1021,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
/// only includes instructions that have DAG nodes, not scheduling boundaries.
///
/// This is a skeletal driver, with all the functionality pushed into helpers,
-/// so that it can be easilly extended by experimental schedulers. Generally,
+/// so that it can be easily extended by experimental schedulers. Generally,
/// implementing MachineSchedStrategy should be sufficient to implement a new
/// scheduling algorithm. However, if a scheduler further subclasses
/// ScheduleDAGMILive then it will want to override this virtual method in order
/// to update any specialized state.
void ScheduleDAGMILive::schedule() {
+ DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
+ DEBUG(SchedImpl->dumpPolicy());
buildDAGWithRegPressure();
Topo.InitDAGTopologicalSorting();
@@ -1017,8 +1042,16 @@ void ScheduleDAGMILive::schedule() {
// This may initialize a DFSResult to be used for queue priority.
SchedImpl->initialize(this);
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
+ DEBUG(
+ for (const SUnit &SU : SUnits) {
+ SU.dumpAll(this);
+ if (ShouldTrackPressure) {
+ dbgs() << " Pressure Diff : ";
+ getPressureDiff(&SU).dump(*TRI);
+ }
+ dbgs() << '\n';
+ }
+ );
if (ViewMISchedDAGs) viewGraph();
// Initialize ready queues now that the DAG and priority data are finalized.
@@ -1030,7 +1063,11 @@ void ScheduleDAGMILive::schedule() {
}
bool IsTopNode = false;
- while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ while (true) {
+ DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
assert(!SU->isScheduled && "Node already scheduled");
if (!checkSchedLimit())
break;
@@ -1149,14 +1186,15 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
unsigned LiveOutHeight = DefSU->getHeight();
unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
// Visit all local users of the vreg def.
- for (VReg2UseMap::iterator
- UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
- if (UI->SU == &ExitSU)
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
+ if (SU == &ExitSU)
continue;
// Only consider uses of the phi.
LiveQueryResult LRQ =
- LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
+ LI.Query(LIS->getInstructionIndex(SU->getInstr()));
if (!LRQ.valueIn()->isPHIDef())
continue;
@@ -1164,10 +1202,10 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
// overestimate in strange cases. This allows cyclic latency to be
// estimated as the minimum slack of the vreg's depth or height.
unsigned CyclicLatency = 0;
- if (LiveOutDepth > UI->SU->getDepth())
- CyclicLatency = LiveOutDepth - UI->SU->getDepth();
+ if (LiveOutDepth > SU->getDepth())
+ CyclicLatency = LiveOutDepth - SU->getDepth();
- unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
+ unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
if (LiveInHeight > LiveOutHeight) {
if (LiveInHeight - LiveOutHeight < CyclicLatency)
CyclicLatency = LiveInHeight - LiveOutHeight;
@@ -1176,7 +1214,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
CyclicLatency = 0;
DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
- << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
+ << SU->NodeNum << ") = " << CyclicLatency << "c\n");
if (CyclicLatency > MaxCyclicLatency)
MaxCyclicLatency = CyclicLatency;
}
@@ -1203,6 +1241,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
// Update top scheduled pressure.
TopRPTracker.advance();
assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ DEBUG(
+ dbgs() << "Top Pressure:\n";
+ dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+ );
+
updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
}
}
@@ -1225,6 +1268,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
SmallVector<unsigned, 8> LiveUses;
BotRPTracker.recede(&LiveUses);
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ DEBUG(
+ dbgs() << "Bottom Pressure:\n";
+ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
+ );
+
updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
updatePressureDiffs(LiveUses);
}
@@ -1349,25 +1397,49 @@ namespace {
/// \brief Post-process the DAG to create cluster edges between instructions
/// that may be fused by the processor into a single operation.
class MacroFusion : public ScheduleDAGMutation {
- const TargetInstrInfo *TII;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
public:
- MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+ MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
+ : TII(TII), TRI(TRI) {}
void apply(ScheduleDAGMI *DAG) override;
};
} // anonymous
+/// Returns true if \p MI reads a register written by \p Other.
+static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
+ const MachineInstr &Other) {
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Other.modifiesRegister(Reg, &TRI))
+ return true;
+ }
+ return false;
+}
+
/// \brief Callback from DAG postProcessing to create cluster edges to encourage
/// fused operations.
void MacroFusion::apply(ScheduleDAGMI *DAG) {
// For now, assume targets can only fuse with the branch.
- MachineInstr *Branch = DAG->ExitSU.getInstr();
+ SUnit &ExitSU = DAG->ExitSU;
+ MachineInstr *Branch = ExitSU.getInstr();
if (!Branch)
return;
- for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
- SUnit *SU = &DAG->SUnits[--Idx];
- if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+ for (SUnit &SU : DAG->SUnits) {
+ // SUnits with successors can't be schedule in front of the ExitSU.
+ if (!SU.Succs.empty())
+ continue;
+ // We only care if the node writes to a register that the branch reads.
+ MachineInstr *Pred = SU.getInstr();
+ if (!HasDataDep(TRI, *Branch, *Pred))
+ continue;
+
+ if (!TII.shouldScheduleAdjacent(Pred, Branch))
continue;
// Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1376,11 +1448,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
// scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
// of SU, we could create an artificial edge from the deepest root, but it
// hasn't been needed yet.
- bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+ bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
(void)Success;
assert(Success && "No DAG nodes should be reachable from ExitSU");
- DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+ DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
break;
}
}
@@ -2277,7 +2349,7 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
Latency = Cand.SU->getDepth();
break;
}
- dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+ dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
if (P.isValid())
dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
<< ":" << P.getUnitInc() << " ";
@@ -2438,6 +2510,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
}
}
+void GenericScheduler::dumpPolicy() {
+ dbgs() << "GenericScheduler RegionPolicy: "
+ << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
+ << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
+ << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
+ << "\n";
+}
+
/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
/// critical path by more cycles than it takes to drain the instruction buffer.
/// We estimate an upper bounds on in-flight instructions as:
@@ -2499,11 +2579,13 @@ static bool tryPressure(const PressureChange &TryP,
const PressureChange &CandP,
GenericSchedulerBase::SchedCandidate &TryCand,
GenericSchedulerBase::SchedCandidate &Cand,
- GenericSchedulerBase::CandReason Reason) {
- int TryRank = TryP.getPSetOrMax();
- int CandRank = CandP.getPSetOrMax();
+ GenericSchedulerBase::CandReason Reason,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) {
+ unsigned TryPSet = TryP.getPSetOrMax();
+ unsigned CandPSet = CandP.getPSetOrMax();
// If both candidates affect the same set, go with the smallest increase.
- if (TryRank == CandRank) {
+ if (TryPSet == CandPSet) {
return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
Reason);
}
@@ -2513,6 +2595,13 @@ static bool tryPressure(const PressureChange &TryP,
Reason)) {
return true;
}
+
+ int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
+ std::numeric_limits<int>::max();
+
+ int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
+ std::numeric_limits<int>::max();
+
// If the candidates are decreasing pressure, reverse priority.
if (TryP.getUnitInc() < 0)
std::swap(TryRank, CandRank);
@@ -2597,7 +2686,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
}
}
DEBUG(if (TryCand.RPDelta.Excess.isValid())
- dbgs() << " SU(" << TryCand.SU->NodeNum << ") "
+ dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") "
<< TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
<< ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
@@ -2615,13 +2704,15 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// Avoid exceeding the target's limit.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
Cand.RPDelta.Excess,
- TryCand, Cand, RegExcess))
+ TryCand, Cand, RegExcess, TRI,
+ DAG->MF))
return;
// Avoid increasing the max critical pressure in the scheduled region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
Cand.RPDelta.CriticalMax,
- TryCand, Cand, RegCritical))
+ TryCand, Cand, RegCritical, TRI,
+ DAG->MF))
return;
// For loops that are acyclic path limited, aggressively schedule for latency.
@@ -2657,7 +2748,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// Avoid increasing the max pressure of the entire region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
Cand.RPDelta.CurrentMax,
- TryCand, Cand, RegMax))
+ TryCand, Cand, RegMax, TRI,
+ DAG->MF))
return;
// Avoid critical resource consumption and balance the schedule.
@@ -2672,8 +2764,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// Avoid serializing long latency dependence chains.
// For acyclic path limited loops, latency was already checked above.
- if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
- && tryLatency(TryCand, Cand, Zone)) {
+ if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {
return;
}
@@ -2727,12 +2819,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
IsTopNode = false;
- DEBUG(dbgs() << "Pick Bot NOCAND\n");
+ DEBUG(dbgs() << "Pick Bot ONLY1\n");
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
IsTopNode = true;
- DEBUG(dbgs() << "Pick Top NOCAND\n");
+ DEBUG(dbgs() << "Pick Top ONLY1\n");
return SU;
}
CandPolicy NoPolicy;
@@ -2887,7 +2979,7 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
if (EnableLoadCluster && DAG->TII->enableClusterLoads())
DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
if (EnableMacroFusion)
- DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
+ DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
return DAG;
}
@@ -3254,12 +3346,10 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
}
static bool isNodeHidden(const SUnit *Node) {
- return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
- }
-
- static bool hasNodeAddressLabel(const SUnit *Node,
- const ScheduleDAG *Graph) {
- return false;
+ if (ViewMISchedCutoff == 0)
+ return false;
+ return (Node->Preds.size() > ViewMISchedCutoff
+ || Node->Succs.size() > ViewMISchedCutoff);
}
/// If you want to override the dot attributes printed for a particular
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 1b9be50..5e6d619 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -87,7 +87,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -150,7 +150,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
@@ -268,7 +268,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
PDT = &getAnalysis<MachinePostDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool EverMadeChange = false;
@@ -667,7 +667,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// It's not safe to sink instructions to EH landing pad. Control flow into
// landing pad is implicitly defined.
- if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+ if (SuccToSinkTo && SuccToSinkTo->isEHPad())
return nullptr;
return SuccToSinkTo;
@@ -686,7 +686,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
if (!MI->isSafeToMove(AA, SawStore))
return false;
- // Convergent operations may only be moved to control equivalent locations.
+ // Convergent operations may not be made control-dependent on additional
+ // values.
if (MI->isConvergent())
return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index d9a6b684..f7edacd 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -724,13 +724,12 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
// Update RegUnits to reflect live registers after UseMI.
// First kills.
- for (unsigned i = 0, e = Kills.size(); i != e; ++i)
- for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+ for (unsigned Kill : Kills)
+ for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
RegUnits.erase(*Units);
// Second, live defs.
- for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
- unsigned DefOp = LiveDefOps[i];
+ for (unsigned DefOp : LiveDefOps) {
for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
Units.isValid(); ++Units) {
LiveRegUnit &LRU = RegUnits[*Units];
@@ -756,8 +755,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
assert(TBI.HasValidInstrDepths && "Missing depth info");
assert(TBI.HasValidInstrHeights && "Missing height info");
unsigned MaxLen = 0;
- for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
- const LiveInReg &LIR = TBI.LiveIns[i];
+ for (const LiveInReg &LIR : TBI.LiveIns) {
if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
continue;
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index ca35ec5..cdcd8eb 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
@@ -42,7 +43,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -204,18 +204,19 @@ namespace {
void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
void visitMachineFunctionAfter();
+ template <typename T> void report(const char *msg, ilist_iterator<T> I) {
+ report(msg, &*I);
+ }
void report(const char *msg, const MachineFunction *MF);
void report(const char *msg, const MachineBasicBlock *MBB);
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum);
- void report(const char *msg, const MachineFunction *MF,
- const LiveInterval &LI);
- void report(const char *msg, const MachineBasicBlock *MBB,
- const LiveInterval &LI);
- void report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR, unsigned Reg, unsigned LaneMask);
- void report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR, unsigned Reg, unsigned LaneMask);
+
+ void report_context(const LiveInterval &LI) const;
+ void report_context(const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask) const;
+ void report_context(const LiveRange::Segment &S) const;
+ void report_context(const VNInfo &VNI) const;
void verifyInlineAsm(const MachineInstr *MI);
@@ -233,9 +234,11 @@ namespace {
void verifyLiveRangeSegment(const LiveRange&,
const LiveRange::const_iterator I, unsigned,
unsigned);
- void verifyLiveRange(const LiveRange&, unsigned, unsigned LaneMask = 0);
+ void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0);
void verifyStackFrame();
+
+ void verifySlotIndexes() const;
};
struct MachineVerifierPass : public MachineFunctionPass {
@@ -273,6 +276,19 @@ void MachineFunction::verify(Pass *p, const char *Banner) const {
.runOnMachineFunction(const_cast<MachineFunction&>(*this));
}
+void MachineVerifier::verifySlotIndexes() const {
+ if (Indexes == nullptr)
+ return;
+
+ // Ensure the IdxMBB list is sorted by slot indexes.
+ SlotIndex Last;
+ for (SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(),
+ E = Indexes->MBBIndexEnd(); I != E; ++I) {
+ assert(!Last.isValid() || I->first > Last);
+ Last = I->first;
+ }
+}
+
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
foundErrors = 0;
@@ -295,10 +311,12 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
}
+ verifySlotIndexes();
+
visitMachineFunctionBefore();
for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
MFI!=MFE; ++MFI) {
- visitMachineBasicBlockBefore(MFI);
+ visitMachineBasicBlockBefore(&*MFI);
// Keep track of the current bundle header.
const MachineInstr *CurBundle = nullptr;
// Do we expect the next instruction to be part of the same bundle?
@@ -306,7 +324,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
- if (MBBI->getParent() != MFI) {
+ if (MBBI->getParent() != &*MFI) {
report("Bad instruction parent pointer", MFI);
errs() << "Instruction: " << *MBBI;
continue;
@@ -315,20 +333,22 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
// Check for consistent bundle flags.
if (InBundle && !MBBI->isBundledWithPred())
report("Missing BundledPred flag, "
- "BundledSucc was set on predecessor", MBBI);
+ "BundledSucc was set on predecessor",
+ &*MBBI);
if (!InBundle && MBBI->isBundledWithPred())
report("BundledPred flag is set, "
- "but BundledSucc not set on predecessor", MBBI);
+ "but BundledSucc not set on predecessor",
+ &*MBBI);
// Is this a bundle header?
if (!MBBI->isInsideBundle()) {
if (CurBundle)
visitMachineBundleAfter(CurBundle);
- CurBundle = MBBI;
+ CurBundle = &*MBBI;
visitMachineBundleBefore(CurBundle);
} else if (!CurBundle)
report("No bundle header", MBBI);
- visitMachineInstrBefore(MBBI);
+ visitMachineInstrBefore(&*MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
const MachineInstr &MI = *MBBI;
const MachineOperand &Op = MI.getOperand(I);
@@ -341,7 +361,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
visitMachineOperand(&Op, I);
}
- visitMachineInstrAfter(MBBI);
+ visitMachineInstrAfter(&*MBBI);
// Was this the last bundled instruction?
InBundle = MBBI->isBundledWithSucc();
@@ -350,7 +370,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
visitMachineBundleAfter(CurBundle);
if (InBundle)
report("BundledSucc flag set on last instruction in block", &MFI->back());
- visitMachineBasicBlockAfter(MFI);
+ visitMachineBasicBlockAfter(&*MFI);
}
visitMachineFunctionAfter();
@@ -375,7 +395,10 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
if (!foundErrors++) {
if (Banner)
errs() << "# " << Banner << '\n';
- MF->print(errs(), Indexes);
+ if (LiveInts != nullptr)
+ LiveInts->print(errs());
+ else
+ MF->print(errs(), Indexes);
}
errs() << "*** Bad machine code: " << msg << " ***\n"
<< "- function: " << MF->getName() << "\n";
@@ -399,7 +422,8 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
errs() << "- instruction: ";
if (Indexes && Indexes->hasIndex(MI))
errs() << Indexes->getInstructionIndex(MI) << '\t';
- MI->print(errs(), TM);
+ MI->print(errs(), /*SkipOpers=*/true);
+ errs() << '\n';
}
void MachineVerifier::report(const char *msg,
@@ -411,36 +435,24 @@ void MachineVerifier::report(const char *msg,
errs() << "\n";
}
-void MachineVerifier::report(const char *msg, const MachineFunction *MF,
- const LiveInterval &LI) {
- report(msg, MF);
- errs() << "- interval: " << LI << '\n';
-}
-
-void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
- const LiveInterval &LI) {
- report(msg, MBB);
+void MachineVerifier::report_context(const LiveInterval &LI) const {
errs() << "- interval: " << LI << '\n';
}
-void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR, unsigned Reg,
- unsigned LaneMask) {
- report(msg, MBB);
- errs() << "- liverange: " << LR << '\n';
+void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask) const {
errs() << "- register: " << PrintReg(Reg, TRI) << '\n';
if (LaneMask != 0)
- errs() << "- lanemask: " << format("%04X\n", LaneMask);
+ errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n';
+ errs() << "- liverange: " << LR << '\n';
}
-void MachineVerifier::report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR, unsigned Reg,
- unsigned LaneMask) {
- report(msg, MF);
- errs() << "- liverange: " << LR << '\n';
- errs() << "- register: " << PrintReg(Reg, TRI) << '\n';
- if (LaneMask != 0)
- errs() << "- lanemask: " << format("%04X\n", LaneMask);
+void MachineVerifier::report_context(const LiveRange::Segment &S) const {
+ errs() << "- segment: " << S << '\n';
+}
+
+void MachineVerifier::report_context(const VNInfo &VNI) const {
+ errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n";
}
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
@@ -507,11 +519,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MRI->isSSA()) {
// If this block has allocatable physical registers live-in, check that
// it is an entry block or landing pad.
- for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
- LE = MBB->livein_end();
- LI != LE; ++LI) {
- unsigned reg = *LI;
- if (isAllocatable(reg) && !MBB->isLandingPad() &&
+ for (const auto &LI : MBB->liveins()) {
+ if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
MBB != MBB->getParent()->begin()) {
report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
}
@@ -522,7 +531,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- if ((*I)->isLandingPad())
+ if ((*I)->isEHPad())
LandingPadSuccs.insert(*I);
if (!FunctionBlocks.count(*I))
report("MBB has successor that isn't part of the function.", MBB);
@@ -547,10 +556,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
const BasicBlock *BB = MBB->getBasicBlock();
+ const Function *Fn = MF->getFunction();
if (LandingPadSuccs.size() > 1 &&
!(AsmInfo &&
AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
- BB && isa<SwitchInst>(BB->getTerminator())))
+ BB && isa<SwitchInst>(BB->getTerminator())) &&
+ !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn())))
report("MBB has more than one landing pad successor", MBB);
// Call AnalyzeBranch. If it succeeds, there several more conditions to check.
@@ -562,7 +573,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// check whether its answers match up with reality.
if (!TBB && !FBB) {
// Block falls through to its successor.
- MachineFunction::const_iterator MBBI = MBB;
+ MachineFunction::const_iterator MBBI = MBB->getIterator();
++MBBI;
if (MBBI == MF->end()) {
// It's possible that the block legitimately ends with a noreturn
@@ -575,7 +586,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
} else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
report("MBB exits via unconditional fall-through but doesn't have "
"exactly one CFG successor!", MBB);
- } else if (!MBB->isSuccessor(MBBI)) {
+ } else if (!MBB->isSuccessor(&*MBBI)) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
@@ -613,7 +624,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
} else if (TBB && !FBB && !Cond.empty()) {
// Block conditionally branches somewhere, otherwise falls through.
- MachineFunction::const_iterator MBBI = MBB;
+ MachineFunction::const_iterator MBBI = MBB->getIterator();
++MBBI;
if (MBBI == MF->end()) {
report("MBB conditionally falls through out of function!", MBB);
@@ -628,7 +639,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
} else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/fall-through but doesn't have "
"exactly two CFG successors!", MBB);
- } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+ } else if (!matchPair(MBB->succ_begin(), TBB, &*MBBI)) {
report("MBB exits via conditional branch/fall-through but the CFG "
"successors don't match the actual successors!", MBB);
}
@@ -680,13 +691,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
regsLive.clear();
- for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
- E = MBB->livein_end(); I != E; ++I) {
- if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+ for (const auto &LI : MBB->liveins()) {
+ if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
report("MBB live-in list contains non-physical register", MBB);
continue;
}
- for (MCSubRegIterator SubRegs(*I, TRI, /*IncludeSelf=*/true);
+ for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
regsLive.insert(*SubRegs);
}
@@ -822,9 +832,12 @@ void
MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumDefs = MCID.getNumDefs();
+ if (MCID.getOpcode() == TargetOpcode::PATCHPOINT)
+ NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0;
// The first MCID.NumDefs operands must be explicit register defines
- if (MONum < MCID.getNumDefs()) {
+ if (MONum < NumDefs) {
const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
if (!MO->isReg())
report("Explicit definition must be a register", MO, MONum);
@@ -972,13 +985,38 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
case MachineOperand::MO_FrameIndex:
if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
LiveInts && !LiveInts->isNotInMIMap(MI)) {
- LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+ int FI = MO->getIndex();
+ LiveInterval &LI = LiveStks->getInterval(FI);
SlotIndex Idx = LiveInts->getInstructionIndex(MI);
- if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
+
+ bool stores = MI->mayStore();
+ bool loads = MI->mayLoad();
+ // For a memory-to-memory move, we need to check if the frame
+ // index is used for storing or loading, by inspecting the
+ // memory operands.
+ if (stores && loads) {
+ for (auto *MMO : MI->memoperands()) {
+ const PseudoSourceValue *PSV = MMO->getPseudoValue();
+ if (PSV == nullptr) continue;
+ const FixedStackPseudoSourceValue *Value =
+ dyn_cast<FixedStackPseudoSourceValue>(PSV);
+ if (Value == nullptr) continue;
+ if (Value->getFrameIndex() != FI) continue;
+
+ if (MMO->isStore())
+ loads = false;
+ else
+ stores = false;
+ break;
+ }
+ if (loads == stores)
+ report("Missing fixed stack memoperand.", MI);
+ }
+ if (loads && !LI.liveAt(Idx.getRegSlot(true))) {
report("Instruction loads from dead spill slot", MO, MONum);
errs() << "Live stack: " << LI << '\n';
}
- if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
+ if (stores && !LI.liveAt(Idx.getRegSlot())) {
report("Instruction stores to dead spill slot", MO, MONum);
errs() << "Live stack: " << LI << '\n';
}
@@ -1387,40 +1425,39 @@ void MachineVerifier::verifyLiveIntervals() {
void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
const VNInfo *VNI, unsigned Reg,
- unsigned LaneMask) {
+ LaneBitmask LaneMask) {
if (VNI->isUnused())
return;
const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def);
if (!DefVNI) {
- report("Valno not live at def and not marked unused", MF, LR, Reg,
- LaneMask);
- errs() << "Valno #" << VNI->id << '\n';
+ report("Value not live at VNInfo def and not marked unused", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
return;
}
if (DefVNI != VNI) {
- report("Live segment at def has different valno", MF, LR, Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " where valno #" << DefVNI->id << " is live\n";
+ report("Live segment at def has different VNInfo", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
return;
}
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
if (!MBB) {
- report("Invalid definition index", MF, LR, Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LR << '\n';
+ report("Invalid VNInfo definition index", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
return;
}
if (VNI->isPHIDef()) {
if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MBB, LR, Reg,
- LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
- << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+ report("PHIDef VNInfo is not defined at MBB start", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
}
return;
}
@@ -1428,8 +1465,9 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
// Non-PHI def.
const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
if (!MI) {
- report("No instruction at def index", MBB, LR, Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ report("No instruction at VNInfo def index", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
return;
}
@@ -1457,60 +1495,67 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
if (!hasDef) {
report("Defining instruction does not modify register", MI);
- errs() << "Valno #" << VNI->id << " in " << LR << '\n';
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
}
// Early clobber defs begin at USE slots, but other defs must begin at
// DEF slots.
if (isEarlyClobber) {
if (!VNI->def.isEarlyClobber()) {
- report("Early clobber def must be at an early-clobber slot", MBB, LR,
- Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ report("Early clobber def must be at an early-clobber slot", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
}
} else if (!VNI->def.isRegister()) {
- report("Non-PHI, non-early clobber def must be at a register slot",
- MBB, LR, Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ report("Non-PHI, non-early clobber def must be at a register slot", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
}
}
}
void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const LiveRange::const_iterator I,
- unsigned Reg, unsigned LaneMask) {
+ unsigned Reg, LaneBitmask LaneMask)
+{
const LiveRange::Segment &S = *I;
const VNInfo *VNI = S.valno;
assert(VNI && "Live segment has no valno");
if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
- report("Foreign valno in live segment", MF, LR, Reg, LaneMask);
- errs() << S << " has a bad valno\n";
+ report("Foreign valno in live segment", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ report_context(*VNI);
}
if (VNI->isUnused()) {
- report("Live segment valno is marked unused", MF, LR, Reg, LaneMask);
- errs() << S << '\n';
+ report("Live segment valno is marked unused", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
if (!MBB) {
- report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask);
- errs() << S << '\n';
+ report("Bad start of live segment, no basic block", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
return;
}
SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
if (S.start != MBBStartIdx && S.start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg,
- LaneMask);
- errs() << S << '\n';
+ report("Live segment must begin at MBB entry or valno def", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
const MachineBasicBlock *EndMBB =
LiveInts->getMBBFromIndex(S.end.getPrevSlot());
if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask);
- errs() << S << '\n';
+ report("Bad end of live segment, no basic block", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
return;
}
@@ -1527,26 +1572,26 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const MachineInstr *MI =
LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg,
- LaneMask);
- errs() << S << '\n';
+ report("Live segment doesn't end at a valid instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
return;
}
// The block slot must refer to a basic block boundary.
if (S.end.isBlock()) {
- report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg,
- LaneMask);
- errs() << S << '\n';
+ report("Live segment ends at B slot of an instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
if (S.end.isDead()) {
// Segment ends on the dead slot.
// That means there must be a dead def.
if (!SlotIndex::isSameInstr(S.start, S.end)) {
- report("Live segment ending at dead slot spans instructions", EndMBB, LR,
- Reg, LaneMask);
- errs() << S << '\n';
+ report("Live segment ending at dead slot spans instructions", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
}
@@ -1555,9 +1600,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (S.end.isEarlyClobber()) {
if (I+1 == LR.end() || (I+1)->start != S.end) {
report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", EndMBB, LR, Reg,
- LaneMask);
- errs() << S << '\n';
+ "redefined by an EC def in the same instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
}
@@ -1587,14 +1632,15 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
!hasSubRegDef) {
report("Instruction ending live segment doesn't read the register",
MI);
- errs() << S << " in " << LR << '\n';
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
}
}
}
// Now check all the basic blocks in this live segment.
- MachineFunction::const_iterator MFI = MBB;
+ MachineFunction::const_iterator MFI = MBB->getIterator();
// Is this live segment the beginning of a non-PHIDef VN?
if (S.start == VNI->def && !VNI->isPHIDef()) {
// Not live-in to any blocks.
@@ -1604,10 +1650,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
++MFI;
}
for (;;) {
- assert(LiveInts->isLiveInToMBB(LR, MFI));
+ assert(LiveInts->isLiveInToMBB(LR, &*MFI));
// We don't know how to track physregs into a landing pad.
if (!TargetRegisterInfo::isVirtualRegister(Reg) &&
- MFI->isLandingPad()) {
+ MFI->isEHPad()) {
if (&*MFI == EndMBB)
break;
++MFI;
@@ -1616,7 +1662,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Is VNI a PHI-def in the current block?
bool IsPHI = VNI->isPHIDef() &&
- VNI->def == LiveInts->getMBBStartIdx(MFI);
+ VNI->def == LiveInts->getMBBStartIdx(&*MFI);
// Check that VNI is live-out of all predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
@@ -1626,22 +1672,23 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// All predecessors must have a live-out value.
if (!PVNI) {
- report("Register not marked live out of predecessor", *PI, LR, Reg,
- LaneMask);
- errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
- << PEnd << '\n';
+ report("Register not marked live out of predecessor", *PI);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ errs() << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before "
+ << PEnd << '\n';
continue;
}
// Only PHI-defs can take different predecessor values.
if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI, LR, Reg,
- LaneMask);
+ report("Different value live out of predecessor", *PI);
+ report_context(LR, Reg, LaneMask);
errs() << "Valno #" << PVNI->id << " live out of BB#"
- << (*PI)->getNumber() << '@' << PEnd
- << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
+ << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id
+ << " live into BB#" << MFI->getNumber() << '@'
+ << LiveInts->getMBBStartIdx(&*MFI) << '\n';
}
}
if (&*MFI == EndMBB)
@@ -1651,7 +1698,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
}
void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
- unsigned LaneMask) {
+ LaneBitmask LaneMask) {
for (const VNInfo *VNI : LR.valnos)
verifyLiveRangeValue(LR, VNI, Reg, LaneMask);
@@ -1664,24 +1711,35 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
assert(TargetRegisterInfo::isVirtualRegister(Reg));
verifyLiveRange(LI, Reg);
- unsigned Mask = 0;
- unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask Mask = 0;
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((Mask & SR.LaneMask) != 0)
- report("Lane masks of sub ranges overlap in live interval", MF, LI);
- if ((SR.LaneMask & ~MaxMask) != 0)
- report("Subrange lanemask is invalid", MF, LI);
+ if ((Mask & SR.LaneMask) != 0) {
+ report("Lane masks of sub ranges overlap in live interval", MF);
+ report_context(LI);
+ }
+ if ((SR.LaneMask & ~MaxMask) != 0) {
+ report("Subrange lanemask is invalid", MF);
+ report_context(LI);
+ }
+ if (SR.empty()) {
+ report("Subrange must not be empty", MF);
+ report_context(SR, LI.reg, SR.LaneMask);
+ }
Mask |= SR.LaneMask;
verifyLiveRange(SR, LI.reg, SR.LaneMask);
- if (!LI.covers(SR))
- report("A Subrange is not covered by the main range", MF, LI);
+ if (!LI.covers(SR)) {
+ report("A Subrange is not covered by the main range", MF);
+ report_context(LI);
+ }
}
// Check the LI only has one connected component.
ConnectedVNInfoEqClasses ConEQ(*LiveInts);
unsigned NumComp = ConEQ.Classify(&LI);
if (NumComp > 1) {
- report("Multiple connected components in live interval", MF, LI);
+ report("Multiple connected components in live interval", MF);
+ report_context(LI);
for (unsigned comp = 0; comp != NumComp; ++comp) {
errs() << comp << ": valnos";
for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index d343301..2c93792 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -548,7 +548,7 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineLoopInfo *MLI) {
- if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
+ if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad())
return false; // Quick exit for basic blocks without PHIs.
const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr;
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index 99bbad1..4cabc3a 100644
--- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -28,7 +28,7 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
// Usually, we just want to insert the copy before the first terminator
// instruction. However, for the edge going to a landing pad, we must insert
// the copy before the call/invoke instruction.
- if (!SuccMBB->isLandingPad())
+ if (!SuccMBB->isEHPad())
return MBB->getFirstTerminator();
// Discover any defs/uses in this basic block.
diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
new file mode 100644
index 0000000..e73ba02
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
@@ -0,0 +1,96 @@
+//===-- ParallelCG.cpp ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that can be used for parallel code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ParallelCG.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/thread.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+
+using namespace llvm;
+
+static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
+ const Target *TheTarget, StringRef CPU, StringRef Features,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL,
+ TargetMachine::CodeGenFileType FileType) {
+ std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
+ M->getTargetTriple(), CPU, Features, Options, RM, CM, OL));
+
+ legacy::PassManager CodeGenPasses;
+ if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType))
+ report_fatal_error("Failed to setup codegen");
+ CodeGenPasses.run(*M);
+}
+
+std::unique_ptr<Module>
+llvm::splitCodeGen(std::unique_ptr<Module> M,
+ ArrayRef<llvm::raw_pwrite_stream *> OSs, StringRef CPU,
+ StringRef Features, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL,
+ TargetMachine::CodeGenFileType FileType) {
+ StringRef TripleStr = M->getTargetTriple();
+ std::string ErrMsg;
+ const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
+ if (!TheTarget)
+ report_fatal_error(Twine("Target not found: ") + ErrMsg);
+
+ if (OSs.size() == 1) {
+ codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM,
+ OL, FileType);
+ return M;
+ }
+
+ std::vector<thread> Threads;
+ SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr<Module> MPart) {
+ // We want to clone the module in a new context to multi-thread the codegen.
+ // We do it by serializing partition modules to bitcode (while still on the
+ // main thread, in order to avoid data races) and spinning up new threads
+ // which deserialize the partitions into separate contexts.
+ // FIXME: Provide a more direct way to do this in LLVM.
+ SmallVector<char, 0> BC;
+ raw_svector_ostream BCOS(BC);
+ WriteBitcodeToFile(MPart.get(), BCOS);
+
+ llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()];
+ Threads.emplace_back(
+ [TheTarget, CPU, Features, Options, RM, CM, OL, FileType,
+ ThreadOS](const SmallVector<char, 0> &BC) {
+ LLVMContext Ctx;
+ ErrorOr<std::unique_ptr<Module>> MOrErr =
+ parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()),
+ "<split-module>"),
+ Ctx);
+ if (!MOrErr)
+ report_fatal_error("Failed to read bitcode");
+ std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
+
+ codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features,
+ Options, RM, CM, OL, FileType);
+ },
+ // Pass BC using std::move to ensure that it get moved rather than
+ // copied into the thread's context.
+ std::move(BC));
+ });
+
+ for (thread &T : Threads)
+ T.join();
+
+ return {};
+}
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
index 024d166..873f712 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -13,7 +13,11 @@
//===---------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/IR/IRPrintingPasses.h"
@@ -52,9 +56,6 @@ static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
cl::desc("Disable Machine LICM"));
static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
cl::desc("Disable Machine Common Subexpression Elimination"));
-static cl::opt<cl::boolOrDefault>
- EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
- cl::desc("enable the shrink-wrapping pass"));
static cl::opt<cl::boolOrDefault> OptimizeRegAlloc(
"optimize-regalloc", cl::Hidden,
cl::desc("Enable optimized register allocation compilation path."));
@@ -95,10 +96,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
-// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it
-// wouldn't be part of the standard pass pipeline, and the target would just add
-// a PostRA scheduling pass wherever it wants.
-static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
+// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
+// Targets can return true in targetSchedulesPostRAScheduling() and
+// insert a PostRA scheduling pass wherever it wants.
+cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
// Experimental option to run live interval analysis early.
@@ -188,6 +189,29 @@ char TargetPassConfig::ID = 0;
char TargetPassConfig::EarlyTailDuplicateID = 0;
char TargetPassConfig::PostRAMachineLICMID = 0;
+namespace {
+struct InsertedPass {
+ AnalysisID TargetPassID;
+ IdentifyingPassPtr InsertedPassID;
+ bool VerifyAfter;
+ bool PrintAfter;
+
+ InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
+ bool VerifyAfter, bool PrintAfter)
+ : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID),
+ VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {}
+
+ Pass *getInsertedPass() const {
+ assert(InsertedPassID.isValid() && "Illegal Pass ID!");
+ if (InsertedPassID.isInstance())
+ return InsertedPassID.getInstance();
+ Pass *NP = Pass::createPass(InsertedPassID.getID());
+ assert(NP && "Pass ID not registered");
+ return NP;
+ }
+};
+}
+
namespace llvm {
class PassConfigImpl {
public:
@@ -202,7 +226,7 @@ public:
/// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
/// is inserted after each instance of the first one.
- SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses;
+ SmallVector<InsertedPass, 4> InsertedPasses;
};
} // namespace llvm
@@ -217,7 +241,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
: ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
StopAfter(nullptr), Started(true), Stopped(false),
AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
- DisableVerify(false), EnableTailMerge(true), EnableShrinkWrap(false) {
+ DisableVerify(false), EnableTailMerge(true) {
Impl = new PassConfigImpl();
@@ -225,6 +249,10 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
// including this pass itself.
initializeCodeGen(*PassRegistry::getPassRegistry());
+ // Also register alias analysis passes required by codegen passes.
+ initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+
// Substitute Pseudo Pass IDs for real ones.
substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
@@ -232,14 +260,15 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
/// Insert InsertedPassID pass after TargetPassID.
void TargetPassConfig::insertPass(AnalysisID TargetPassID,
- IdentifyingPassPtr InsertedPassID) {
+ IdentifyingPassPtr InsertedPassID,
+ bool VerifyAfter, bool PrintAfter) {
assert(((!InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getID()) ||
(InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
"Insert a pass after itself!");
- std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID);
- Impl->InsertedPasses.push_back(P);
+ Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter,
+ PrintAfter);
}
/// createPassConfig - Create a pass configuration object to be used by
@@ -304,21 +333,9 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
}
// Add the passes after the pass P if there is any.
- for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator
- I = Impl->InsertedPasses.begin(),
- E = Impl->InsertedPasses.end();
- I != E; ++I) {
- if ((*I).first == PassID) {
- assert((*I).second.isValid() && "Illegal Pass ID!");
- Pass *NP;
- if ((*I).second.isInstance())
- NP = (*I).second.getInstance();
- else {
- NP = Pass::createPass((*I).second.getID());
- assert(NP && "Pass ID not registered");
- }
- addPass(NP, false, false);
- }
+ for (auto IP : Impl->InsertedPasses) {
+ if (IP.TargetPassID == PassID)
+ addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter);
}
} else {
delete P;
@@ -380,10 +397,10 @@ void TargetPassConfig::addIRPasses() {
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
if (UseCFLAA)
- addPass(createCFLAliasAnalysisPass());
- addPass(createTypeBasedAliasAnalysisPass());
- addPass(createScopedNoAliasAAPass());
- addPass(createBasicAliasAnalysisPass());
+ addPass(createCFLAAWrapperPass());
+ addPass(createTypeBasedAAWrapperPass());
+ addPass(createScopedNoAliasAAWrapperPass());
+ addPass(createBasicAAWrapperPass());
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
@@ -461,7 +478,7 @@ void TargetPassConfig::addISelPrepare() {
// Add both the safe stack and the stack protection passes: each of them will
// only protect functions that have corresponding attributes.
- addPass(createSafeStackPass());
+ addPass(createSafeStackPass(TM));
addPass(createStackProtectorPass(TM));
if (PrintISelInput)
@@ -539,8 +556,9 @@ void TargetPassConfig::addMachinePasses() {
addPostRegAlloc();
// Insert prolog/epilog code. Eliminate abstract frame index references...
- if (getEnableShrinkWrap())
+ if (getOptLevel() != CodeGenOpt::None)
addPass(&ShrinkWrapID);
+
addPass(&PrologEpilogCodeInserterID);
/// Add passes that optimize machine instructions after register allocation.
@@ -557,7 +575,10 @@ void TargetPassConfig::addMachinePasses() {
addPass(&ImplicitNullChecksID);
// Second pass scheduler.
- if (getOptLevel() != CodeGenOpt::None) {
+ // Let Target optionally insert this pass by itself at some other
+ // point.
+ if (getOptLevel() != CodeGenOpt::None &&
+ !TM->targetSchedulesPostRAScheduling()) {
if (MISchedPostRA)
addPass(&PostMachineSchedulerID);
else
@@ -576,7 +597,10 @@ void TargetPassConfig::addMachinePasses() {
addPreEmitPass();
+ addPass(&FuncletLayoutID, false);
+
addPass(&StackMapLivenessID, false);
+ addPass(&LiveDebugValuesID, false);
AddingMachinePasses = false;
}
@@ -613,27 +637,12 @@ void TargetPassConfig::addMachineSSAOptimization() {
addPass(&MachineCSEID, false);
addPass(&MachineSinkingID);
- addPass(&PeepholeOptimizerID, false);
+ addPass(&PeepholeOptimizerID);
// Clean-up the dead code that may have been generated by peephole
// rewriting.
addPass(&DeadMachineInstructionElimID);
}
-bool TargetPassConfig::getEnableShrinkWrap() const {
- switch (EnableShrinkWrapOpt) {
- case cl::BOU_UNSET:
- return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None;
- // If EnableShrinkWrap is set, it takes precedence on whatever the
- // target sets. The rational is that we assume we want to test
- // something related to shrink-wrapping.
- case cl::BOU_TRUE:
- return true;
- case cl::BOU_FALSE:
- return false;
- }
- llvm_unreachable("Invalid shrink-wrapping state");
-}
-
//===---------------------------------------------------------------------===//
/// Register Allocation Pass Configuration
//===---------------------------------------------------------------------===//
@@ -717,7 +726,8 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
addPass(&PHIEliminationID, false);
addPass(&TwoAddressInstructionPassID, false);
- addPass(RegAllocPass);
+ if (RegAllocPass)
+ addPass(RegAllocPass);
}
/// Add standard target-independent passes that are tightly coupled with
@@ -748,25 +758,27 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
// PreRA instruction scheduling.
addPass(&MachineSchedulerID);
- // Add the selected register allocation pass.
- addPass(RegAllocPass);
+ if (RegAllocPass) {
+ // Add the selected register allocation pass.
+ addPass(RegAllocPass);
- // Allow targets to change the register assignments before rewriting.
- addPreRewrite();
+ // Allow targets to change the register assignments before rewriting.
+ addPreRewrite();
- // Finally rewrite virtual registers.
- addPass(&VirtRegRewriterID);
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
- // Perform stack slot coloring and post-ra machine LICM.
- //
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
- addPass(&StackSlotColoringID);
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(&StackSlotColoringID);
- // Run post-ra machine LICM to hoist reloads / remats.
- //
- // FIXME: can this move into MachineLateOptimization?
- addPass(&PostRAMachineLICMID);
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(&PostRAMachineLICMID);
+ }
}
//===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index ebe05e3..52b42b6 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -43,7 +43,7 @@
// - Optimize Loads:
//
// Loads that can be folded into a later instruction. A load is foldable
-// if it loads to virtual registers and the virtual register defined has
+// if it loads to virtual registers and the virtual register defined has
// a single use.
//
// - Optimize Copies and Bitcast (more generally, target specific copies):
@@ -98,6 +98,16 @@ static cl::opt<bool>
DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
cl::desc("Disable advanced copy optimization"));
+static cl::opt<bool> DisableNAPhysCopyOpt(
+ "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
+ cl::desc("Disable non-allocatable physical register copy optimization"));
+
+// Limit the number of PHI instructions to process
+// in PeepholeOptimizer::getNextSource.
+static cl::opt<unsigned> RewritePHILimit(
+ "rewrite-phi-limit", cl::Hidden, cl::init(10),
+ cl::desc("Limit the length of PHI chains to lookup"));
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@@ -105,8 +115,11 @@ STATISTIC(NumLoadFold, "Number of loads folded");
STATISTIC(NumSelects, "Number of selects optimized");
STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
+STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
namespace {
+ class ValueTrackerResult;
+
class PeepholeOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -130,6 +143,10 @@ namespace {
}
}
+ /// \brief Track Def -> Use info used for rewriting copies.
+ typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
+ RewriteMapTy;
+
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@@ -137,17 +154,38 @@ namespace {
bool optimizeSelect(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool optimizeCondBranch(MachineInstr *MI);
- bool optimizeCopyOrBitcast(MachineInstr *MI);
bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
- bool findNextSource(unsigned &Reg, unsigned &SubReg);
+ bool findNextSource(unsigned Reg, unsigned SubReg,
+ RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+
+ /// \brief If copy instruction \p MI is a virtual register copy, track it in
+ /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
+ /// previously seen as a copy, replace the uses of this copy with the
+ /// previously seen copy's destination register.
+ bool foldRedundantCopy(MachineInstr *MI,
+ SmallSet<unsigned, 4> &CopySrcRegs,
+ DenseMap<unsigned, MachineInstr *> &CopyMIs);
+
+ /// \brief Is the register \p Reg a non-allocatable physical register?
+ bool isNAPhysCopy(unsigned Reg);
+
+ /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical
+ /// register copy, track it in the \p NAPhysToVirtMIs map. If this
+ /// non-allocatable physical register was previously copied to a virtual
+ /// registered and hasn't been clobbered, the virt->phys copy can be
+ /// deleted.
+ bool foldRedundantNAPhysCopy(
+ MachineInstr *MI,
+ DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs);
+
bool isLoadFoldable(MachineInstr *MI,
SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
@@ -171,6 +209,69 @@ namespace {
}
};
+ /// \brief Helper class to hold a reply for ValueTracker queries. Contains the
+ /// returned sources for a given search and the instructions where the sources
+ /// were tracked from.
+ class ValueTrackerResult {
+ private:
+ /// Track all sources found by one ValueTracker query.
+ SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs;
+
+ /// Instruction using the sources in 'RegSrcs'.
+ const MachineInstr *Inst;
+
+ public:
+ ValueTrackerResult() : Inst(nullptr) {}
+ ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) {
+ addSource(Reg, SubReg);
+ }
+
+ bool isValid() const { return getNumSources() > 0; }
+
+ void setInst(const MachineInstr *I) { Inst = I; }
+ const MachineInstr *getInst() const { return Inst; }
+
+ void clear() {
+ RegSrcs.clear();
+ Inst = nullptr;
+ }
+
+ void addSource(unsigned SrcReg, unsigned SrcSubReg) {
+ RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg));
+ }
+
+ void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
+ assert(Idx < getNumSources() && "Reg pair source out of index");
+ RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg);
+ }
+
+ int getNumSources() const { return RegSrcs.size(); }
+
+ unsigned getSrcReg(int Idx) const {
+ assert(Idx < getNumSources() && "Reg source out of index");
+ return RegSrcs[Idx].Reg;
+ }
+
+ unsigned getSrcSubReg(int Idx) const {
+ assert(Idx < getNumSources() && "SubReg source out of index");
+ return RegSrcs[Idx].SubReg;
+ }
+
+ bool operator==(const ValueTrackerResult &Other) {
+ if (Other.getInst() != getInst())
+ return false;
+
+ if (Other.getNumSources() != getNumSources())
+ return false;
+
+ for (int i = 0, e = Other.getNumSources(); i != e; ++i)
+ if (Other.getSrcReg(i) != getSrcReg(i) ||
+ Other.getSrcSubReg(i) != getSrcSubReg(i))
+ return false;
+ return true;
+ }
+ };
+
/// \brief Helper class to track the possible sources of a value defined by
/// a (chain of) copy related instructions.
/// Given a definition (instruction and definition index), this class
@@ -213,23 +314,25 @@ namespace {
/// \brief Dispatcher to the right underlying implementation of
/// getNextSource.
- bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceImpl();
/// \brief Specialized version of getNextSource for Copy instructions.
- bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromCopy();
/// \brief Specialized version of getNextSource for Bitcast instructions.
- bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromBitcast();
/// \brief Specialized version of getNextSource for RegSequence
/// instructions.
- bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromRegSequence();
/// \brief Specialized version of getNextSource for InsertSubreg
/// instructions.
- bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromInsertSubreg();
/// \brief Specialized version of getNextSource for ExtractSubreg
/// instructions.
- bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromExtractSubreg();
/// \brief Specialized version of getNextSource for SubregToReg
/// instructions.
- bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromSubregToReg();
+ /// \brief Specialized version of getNextSource for PHI instructions.
+ ValueTrackerResult getNextSourceFromPHI();
public:
/// \brief Create a ValueTracker instance for the value defined by \p Reg.
@@ -276,16 +379,10 @@ namespace {
/// \brief Following the use-def chain, get the next available source
/// for the tracked value.
- /// When the returned value is not nullptr, \p SrcReg gives the register
- /// that contain the tracked value.
- /// \note The sub register index returned in \p SrcSubReg must be used
- /// on \p SrcReg to access the actual value.
- /// \return Unless the returned value is nullptr (i.e., no source found),
- /// \p SrcReg gives the register of the next source used in the returned
- /// instruction and \p SrcSubReg the sub-register index to be used on that
- /// source to get the tracked value. When nullptr is returned, no
- /// alternative source has been found.
- const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg);
+ /// \return A ValueTrackerResult containing a set of registers
+ /// and sub registers with tracked values. A ValueTrackerResult with
+ /// an empty set of registers means no source was found.
+ ValueTrackerResult getNextSource();
/// \brief Get the last register where the initial value can be found.
/// Initially this is the register of the definition.
@@ -303,11 +400,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
"Peephole Optimizations", false, false)
-/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
-/// a single register and writes a single register and it does not modify the
-/// source, and if the source value is preserved as a sub-register of the
-/// result, then replace all reachable uses of the source with the subreg of the
-/// result.
+/// If instruction is a copy-like instruction, i.e. it reads a single register
+/// and writes a single register and it does not modify the source, and if the
+/// source value is preserved as a sub-register of the result, then replace all
+/// reachable uses of the source with the subreg of the result.
///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
@@ -458,10 +554,10 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
return Changed;
}
-/// optimizeCmpInstr - If the instruction is a compare and the previous
-/// instruction it's comparing against all ready sets (or could be modified to
-/// set) the same flag as the compare, then we can remove the comparison and use
-/// the flag from the previous instruction.
+/// If the instruction is a compare and the previous instruction it's comparing
+/// against already sets (or could be modified to set) the same flag as the
+/// compare, then we can remove the comparison and use the flag from the
+/// previous instruction.
bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
MachineBasicBlock *MBB) {
// If this instruction is a comparison against zero and isn't comparing a
@@ -506,88 +602,138 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
return TII->optimizeCondBranch(MI);
}
-/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
-/// share the same register file.
-static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
- const TargetRegisterClass *DefRC,
- unsigned DefSubReg,
- const TargetRegisterClass *SrcRC,
- unsigned SrcSubReg) {
- // Same register class.
- if (DefRC == SrcRC)
- return true;
-
- // Both operands are sub registers. Check if they share a register class.
- unsigned SrcIdx, DefIdx;
- if (SrcSubReg && DefSubReg)
- return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
- SrcIdx, DefIdx) != nullptr;
- // At most one of the register is a sub register, make it Src to avoid
- // duplicating the test.
- if (!SrcSubReg) {
- std::swap(DefSubReg, SrcSubReg);
- std::swap(DefRC, SrcRC);
- }
-
- // One of the register is a sub register, check if we can get a superclass.
- if (SrcSubReg)
- return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
- // Plain copy.
- return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
-}
-
/// \brief Try to find the next source that share the same register file
/// for the value defined by \p Reg and \p SubReg.
-/// When true is returned, \p Reg and \p SubReg are updated with the
-/// register number and sub-register index of the new source.
+/// When true is returned, the \p RewriteMap can be used by the client to
+/// retrieve all Def -> Use along the way up to the next source. Any found
+/// Use that is not itself a key for another entry, is the next source to
+/// use. During the search for the next source, multiple sources can be found
+/// given multiple incoming sources of a PHI instruction. In this case, we
+/// look in each PHI source for the next source; all found next sources must
+/// share the same register file as \p Reg and \p SubReg. The client should
+/// then be capable to rewrite all intermediate PHIs to get the next source.
/// \return False if no alternative sources are available. True otherwise.
-bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) {
+bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
+ RewriteMapTy &RewriteMap) {
// Do not try to find a new source for a physical register.
// So far we do not have any motivating example for doing that.
// Thus, instead of maintaining untested code, we will revisit that if
// that changes at some point.
if (TargetRegisterInfo::isPhysicalRegister(Reg))
return false;
-
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
- unsigned DefSubReg = SubReg;
-
- unsigned Src;
- unsigned SrcSubReg;
- bool ShouldRewrite = false;
-
- // Follow the chain of copies until we reach the top of the use-def chain
- // or find a more suitable source.
- ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII);
- do {
- unsigned CopySrcReg, CopySrcSubReg;
- if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg))
- break;
- Src = CopySrcReg;
- SrcSubReg = CopySrcSubReg;
-
- // Do not extend the live-ranges of physical registers as they add
- // constraints to the register allocator.
- // Moreover, if we want to extend the live-range of a physical register,
- // unlike SSA virtual register, we will have to check that they are not
- // redefine before the related use.
- if (TargetRegisterInfo::isPhysicalRegister(Src))
- break;
- const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
+ SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook;
+ TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg);
+ SrcToLook.push_back(CurSrcPair);
+
+ unsigned PHICount = 0;
+ while (!SrcToLook.empty() && PHICount < RewritePHILimit) {
+ TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val();
+ // As explained above, do not handle physical registers
+ if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg))
+ return false;
- // If this source does not incur a cross register bank copy, use it.
- ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC,
- SrcSubReg);
- } while (!ShouldRewrite);
+ CurSrcPair = Pair;
+ ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
+ !DisableAdvCopyOpt, TII);
+ ValueTrackerResult Res;
+ bool ShouldRewrite = false;
+
+ do {
+ // Follow the chain of copies until we reach the top of the use-def chain
+ // or find a more suitable source.
+ Res = ValTracker.getNextSource();
+ if (!Res.isValid())
+ break;
+
+ // Insert the Def -> Use entry for the recently found source.
+ ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
+ if (CurSrcRes.isValid()) {
+ assert(CurSrcRes == Res && "ValueTrackerResult found must match");
+ // An existent entry with multiple sources is a PHI cycle we must avoid.
+ // Otherwise it's an entry with a valid next source we already found.
+ if (CurSrcRes.getNumSources() > 1) {
+ DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n");
+ return false;
+ }
+ break;
+ }
+ RewriteMap.insert(std::make_pair(CurSrcPair, Res));
+
+ // ValueTrackerResult usually have one source unless it's the result from
+ // a PHI instruction. Add the found PHI edges to be looked up further.
+ unsigned NumSrcs = Res.getNumSources();
+ if (NumSrcs > 1) {
+ PHICount++;
+ for (unsigned i = 0; i < NumSrcs; ++i)
+ SrcToLook.push_back(TargetInstrInfo::RegSubRegPair(
+ Res.getSrcReg(i), Res.getSrcSubReg(i)));
+ break;
+ }
- // If we did not find a more suitable source, there is nothing to optimize.
- if (!ShouldRewrite || Src == Reg)
+ CurSrcPair.Reg = Res.getSrcReg(0);
+ CurSrcPair.SubReg = Res.getSrcSubReg(0);
+ // Do not extend the live-ranges of physical registers as they add
+ // constraints to the register allocator. Moreover, if we want to extend
+ // the live-range of a physical register, unlike SSA virtual register,
+ // we will have to check that they aren't redefine before the related use.
+ if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
+ return false;
+
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
+ ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
+ CurSrcPair.SubReg);
+ } while (!ShouldRewrite);
+
+ // Continue looking for new sources...
+ if (Res.isValid())
+ continue;
+
+ // Do not continue searching for a new source if the there's at least
+ // one use-def which cannot be rewritten.
+ if (!ShouldRewrite)
+ return false;
+ }
+
+ if (PHICount >= RewritePHILimit) {
+ DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
return false;
+ }
- Reg = Src;
- SubReg = SrcSubReg;
- return true;
+ // If we did not find a more suitable source, there is nothing to optimize.
+ return CurSrcPair.Reg != Reg;
+}
+
+/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are
+/// guaranteed to have the same register class. This is necessary whenever we
+/// successfully traverse a PHI instruction and find suitable sources coming
+/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
+/// suitable to be used in a new COPY instruction.
+static MachineInstr *
+insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs,
+ MachineInstr *OrigPHI) {
+ assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
+
+ const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
+ unsigned NewVR = MRI->createVirtualRegister(NewRC);
+ MachineBasicBlock *MBB = OrigPHI->getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
+ TII->get(TargetOpcode::PHI), NewVR);
+
+ unsigned MBBOpIdx = 2;
+ for (auto RegPair : SrcRegs) {
+ MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
+ MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB());
+ // Since we're extended the lifetime of RegPair.Reg, clear the
+ // kill flags to account for that and make RegPair.Reg reaches
+ // the new PHI.
+ MRI->clearKillFlags(RegPair.Reg);
+ MBBOpIdx += 2;
+ }
+
+ return MIB;
}
namespace {
@@ -624,7 +770,7 @@ public:
/// This source defines the whole definition, i.e.,
/// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
///
- /// The second and subsequent calls will return false, has there is only one
+ /// The second and subsequent calls will return false, as there is only one
/// rewritable source.
///
/// \return True if a rewritable source has been found, false otherwise.
@@ -632,9 +778,9 @@ public:
virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
unsigned &TrackReg,
unsigned &TrackSubReg) {
- // If CurrentSrcIdx == 1, this means this function has already been
- // called once. CopyLike has one defintiion and one argument, thus,
- // there is nothing else to rewrite.
+ // If CurrentSrcIdx == 1, this means this function has already been called
+ // once. CopyLike has one definition and one argument, thus, there is
+ // nothing else to rewrite.
if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
return false;
// This is the first call to getNextRewritableSource.
@@ -653,7 +799,7 @@ public:
/// \brief Rewrite the current source with \p NewReg and \p NewSubReg
/// if possible.
- /// \return True if the rewritting was possible, false otherwise.
+ /// \return True if the rewriting was possible, false otherwise.
virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
return false;
@@ -662,6 +808,157 @@ public:
MOSrc.setSubReg(NewSubReg);
return true;
}
+
+ /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
+ /// the new source to use for rewrite. If \p HandleMultipleSources is true and
+ /// multiple sources for a given \p Def are found along the way, we found a
+ /// PHI instructions that needs to be rewritten.
+ /// TODO: HandleMultipleSources should be removed once we test PHI handling
+ /// with coalescable copies.
+ TargetInstrInfo::RegSubRegPair
+ getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ TargetInstrInfo::RegSubRegPair Def,
+ PeepholeOptimizer::RewriteMapTy &RewriteMap,
+ bool HandleMultipleSources = true) {
+
+ TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
+ do {
+ ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
+ // If there are no entries on the map, LookupSrc is the new source.
+ if (!Res.isValid())
+ return LookupSrc;
+
+ // There's only one source for this definition, keep searching...
+ unsigned NumSrcs = Res.getNumSources();
+ if (NumSrcs == 1) {
+ LookupSrc.Reg = Res.getSrcReg(0);
+ LookupSrc.SubReg = Res.getSrcSubReg(0);
+ continue;
+ }
+
+ // TODO: Remove once multiple srcs w/ coalescable copies are supported.
+ if (!HandleMultipleSources)
+ break;
+
+ // Multiple sources, recurse into each source to find a new source
+ // for it. Then, rewrite the PHI accordingly to its new edges.
+ SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs;
+ for (unsigned i = 0; i < NumSrcs; ++i) {
+ TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i),
+ Res.getSrcSubReg(i));
+ NewPHISrcs.push_back(
+ getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
+ }
+
+ // Build the new PHI node and return its def register as the new source.
+ MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst());
+ MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI);
+ DEBUG(dbgs() << "-- getNewSource\n");
+ DEBUG(dbgs() << " Replacing: " << *OrigPHI);
+ DEBUG(dbgs() << " With: " << *NewPHI);
+ const MachineOperand &MODef = NewPHI->getOperand(0);
+ return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+
+ } while (1);
+
+ return TargetInstrInfo::RegSubRegPair(0, 0);
+ }
+
+ /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
+ /// and create a new COPY instruction. More info about RewriteMap in
+ /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
+ /// Uncoalescable copies, since they are copy like instructions that aren't
+ /// recognized by the register allocator.
+ virtual MachineInstr *
+ RewriteSource(TargetInstrInfo::RegSubRegPair Def,
+ PeepholeOptimizer::RewriteMapTy &RewriteMap) {
+ return nullptr;
+ }
+};
+
+/// \brief Helper class to rewrite uncoalescable copy like instructions
+/// into new COPY (coalescable friendly) instructions.
+class UncoalescableRewriter : public CopyRewriter {
+protected:
+ const TargetInstrInfo &TII;
+ MachineRegisterInfo &MRI;
+ /// The number of defs in the bitcast
+ unsigned NumDefs;
+
+public:
+ UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI)
+ : CopyRewriter(MI), TII(TII), MRI(MRI) {
+ NumDefs = MI.getDesc().getNumDefs();
+ }
+
+ /// \brief Get the next rewritable def source (TrackReg, TrackSubReg)
+ /// All such sources need to be considered rewritable in order to
+ /// rewrite a uncoalescable copy-like instruction. This method return
+ /// each definition that must be checked if rewritable.
+ ///
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // Find the next non-dead definition and continue from there.
+ if (CurrentSrcIdx == NumDefs)
+ return false;
+
+ while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
+ ++CurrentSrcIdx;
+ if (CurrentSrcIdx == NumDefs)
+ return false;
+ }
+
+ // What we track are the alternative sources of the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
+ TrackReg = MODef.getReg();
+ TrackSubReg = MODef.getSubReg();
+
+ CurrentSrcIdx++;
+ return true;
+ }
+
+ /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
+ /// and create a new COPY instruction. More info about RewriteMap in
+ /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
+ /// Uncoalescable copies, since they are copy like instructions that aren't
+ /// recognized by the register allocator.
+ MachineInstr *
+ RewriteSource(TargetInstrInfo::RegSubRegPair Def,
+ PeepholeOptimizer::RewriteMapTy &RewriteMap) override {
+ assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
+ "We do not rewrite physical registers");
+
+ // Find the new source to use in the COPY rewrite.
+ TargetInstrInfo::RegSubRegPair NewSrc =
+ getNewSource(&MRI, &TII, Def, RewriteMap);
+
+ // Insert the COPY.
+ const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg);
+ unsigned NewVR = MRI.createVirtualRegister(DefRC);
+
+ MachineInstr *NewCopy =
+ BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), NewVR)
+ .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
+
+ NewCopy->getOperand(0).setSubReg(Def.SubReg);
+ if (Def.SubReg)
+ NewCopy->getOperand(0).setIsUndef();
+
+ DEBUG(dbgs() << "-- RewriteSource\n");
+ DEBUG(dbgs() << " Replacing: " << CopyLike);
+ DEBUG(dbgs() << " With: " << *NewCopy);
+ MRI.replaceRegWith(Def.Reg, NewVR);
+ MRI.clearKillFlags(NewVR);
+
+ // We extended the lifetime of NewSrc.Reg, clear the kill flags to
+ // account for that.
+ MRI.clearKillFlags(NewSrc.Reg);
+
+ return NewCopy;
+ }
};
/// \brief Specialized rewriter for INSERT_SUBREG instruction.
@@ -699,7 +996,7 @@ public:
// partial definition.
TrackReg = MODef.getReg();
if (MODef.getSubReg())
- // Bails if we have to compose sub-register indices.
+ // Bail if we have to compose sub-register indices.
return false;
TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
return true;
@@ -740,7 +1037,7 @@ public:
CurrentSrcIdx = 1;
const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
SrcReg = MOExtractedReg.getReg();
- // If we have to compose sub-register indices, bails out.
+ // If we have to compose sub-register indices, bail out.
if (MOExtractedReg.getSubReg())
return false;
@@ -818,7 +1115,7 @@ public:
}
const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
SrcReg = MOInsertedReg.getReg();
- // If we have to compose sub-register indices, bails out.
+ // If we have to compose sub-register indices, bail out.
if ((SrcSubReg = MOInsertedReg.getSubReg()))
return false;
@@ -828,7 +1125,7 @@ public:
const MachineOperand &MODef = CopyLike.getOperand(0);
TrackReg = MODef.getReg();
- // If we have to compose sub-registers, bails.
+ // If we have to compose sub-registers, bail.
return MODef.getSubReg() == 0;
}
@@ -850,7 +1147,13 @@ public:
/// \return A pointer to a dynamically allocated CopyRewriter or nullptr
/// if no rewriter works for \p MI.
static CopyRewriter *getCopyRewriter(MachineInstr &MI,
- const TargetInstrInfo &TII) {
+ const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI) {
+ // Handle uncoalescable copy-like instructions.
+ if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+ MI.isExtractSubregLike()))
+ return new UncoalescableRewriter(MI, TII, MRI);
+
switch (MI.getOpcode()) {
default:
return nullptr;
@@ -874,7 +1177,7 @@ static CopyRewriter *getCopyRewriter(MachineInstr &MI,
/// the same register bank.
/// New copies issued by this optimization are register allocator
/// friendly. This optimization does not remove any copy as it may
-/// overconstraint the register allocator, but replaces some operands
+/// overconstrain the register allocator, but replaces some operands
/// when possible.
/// \pre isCoalescableCopy(*MI) is true.
/// \return True, when \p MI has been rewritten. False otherwise.
@@ -889,25 +1192,33 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
bool Changed = false;
// Get the right rewriter for the current copy.
- std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII));
- // If none exists, bails out.
+ std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
+ // If none exists, bail out.
if (!CpyRewriter)
return false;
// Rewrite each rewritable source.
unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
TrackSubReg)) {
- unsigned NewSrc = TrackReg;
- unsigned NewSubReg = TrackSubReg;
- // Try to find a more suitable source.
- // If we failed to do so, or get the actual source,
- // move to the next source.
- if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc)
+ // Keep track of PHI nodes and its incoming edges when looking for sources.
+ RewriteMapTy RewriteMap;
+ // Try to find a more suitable source. If we failed to do so, or get the
+ // actual source, move to the next source.
+ if (!findNextSource(TrackReg, TrackSubReg, RewriteMap))
+ continue;
+
+ // Get the new source to rewrite. TODO: Only enable handling of multiple
+ // sources (PHIs) once we have a motivating example and testcases for it.
+ TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg);
+ TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource(
+ MRI, TII, TrackPair, RewriteMap, false /* multiple sources */);
+ if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0)
continue;
+
// Rewrite source.
- if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) {
+ if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
// We may have extended the live-range of NewSrc, account for that.
- MRI->clearKillFlags(NewSrc);
+ MRI->clearKillFlags(NewSrc.Reg);
Changed = true;
}
}
@@ -936,61 +1247,53 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
// Check if we can rewrite all the values defined by this instruction.
- SmallVector<
- std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>,
- 4> RewritePairs;
- for (const MachineOperand &MODef : MI->defs()) {
- if (MODef.isDead())
- // We can ignore those.
- continue;
+ SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs;
+ // Get the right rewriter for the current copy.
+ std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
+ // If none exists, bail out.
+ if (!CpyRewriter)
+ return false;
+ // Rewrite each rewritable source by generating new COPYs. This works
+ // differently from optimizeCoalescableCopy since it first makes sure that all
+ // definitions can be rewritten.
+ RewriteMapTy RewriteMap;
+ unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg;
+ while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg,
+ CopyDefSubReg)) {
// If a physical register is here, this is probably for a good reason.
// Do not rewrite that.
- if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+ if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg))
return false;
// If we do not know how to rewrite this definition, there is no point
// in trying to kill this instruction.
- TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg());
- TargetInstrInfo::RegSubRegPair Src = Def;
- if (!findNextSource(Src.Reg, Src.SubReg))
+ TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg);
+ if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap))
return false;
- RewritePairs.push_back(std::make_pair(Def, Src));
+
+ RewritePairs.push_back(Def);
}
+
// The change is possible for all defs, do it.
- for (const auto &PairDefSrc : RewritePairs) {
- const auto &Def = PairDefSrc.first;
- const auto &Src = PairDefSrc.second;
+ for (const auto &Def : RewritePairs) {
// Rewrite the "copy" in a way the register coalescer understands.
- assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
- "We do not rewrite physical registers");
- const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
- unsigned NewVR = MRI->createVirtualRegister(DefRC);
- MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- NewVR).addReg(Src.Reg, 0, Src.SubReg);
- NewCopy->getOperand(0).setSubReg(Def.SubReg);
- if (Def.SubReg)
- NewCopy->getOperand(0).setIsUndef();
+ MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap);
+ assert(NewCopy && "Should be able to always generate a new copy");
LocalMIs.insert(NewCopy);
- MRI->replaceRegWith(Def.Reg, NewVR);
- MRI->clearKillFlags(NewVR);
- // We extended the lifetime of Src.
- // Clear the kill flags to account for that.
- MRI->clearKillFlags(Src.Reg);
}
+
// MI is now dead.
MI->eraseFromParent();
++NumUncoalescableCopies;
return true;
}
-/// isLoadFoldable - Check whether MI is a candidate for folding into a later
-/// instruction. We only fold loads to virtual registers and the virtual
-/// register defined has a single use.
+/// Check whether MI is a candidate for folding into a later instruction.
+/// We only fold loads to virtual registers and the virtual register defined
+/// has a single use.
bool PeepholeOptimizer::isLoadFoldable(
- MachineInstr *MI,
- SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
+ MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
if (!MI->canFoldAsLoad() || !MI->mayLoad())
return false;
const MCInstrDesc &MCID = MI->getDesc();
@@ -1010,9 +1313,9 @@ bool PeepholeOptimizer::isLoadFoldable(
return false;
}
-bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
- SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+bool PeepholeOptimizer::isMoveImmediate(
+ MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
const MCInstrDesc &MCID = MI->getDesc();
if (!MI->isMoveImmediate())
return false;
@@ -1028,23 +1331,26 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
return false;
}
-/// foldImmediate - Try folding register operands that are defined by move
-/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// Try folding register operands that are defined by move immediate
+/// instructions, i.e. a trivial constant folding optimization, if
/// and only if the def and use are in the same BB.
-bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+bool PeepholeOptimizer::foldImmediate(
+ MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isDef())
continue;
+ // Ignore dead implicit defs.
+ if (MO.isImplicit() && MO.isDead())
+ continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (ImmDefRegs.count(Reg) == 0)
continue;
DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
- assert(II != ImmDefMIs.end());
+ assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
++NumImmFold;
return true;
@@ -1053,6 +1359,117 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
return false;
}
+// FIXME: This is very simple and misses some cases which should be handled when
+// motivating examples are found.
+//
+// The copy rewriting logic should look at uses as well as defs and be able to
+// eliminate copies across blocks.
+//
+// Later copies that are subregister extracts will also not be eliminated since
+// only the first copy is considered.
+//
+// e.g.
+// %vreg1 = COPY %vreg0
+// %vreg2 = COPY %vreg0:sub1
+//
+// Should replace %vreg2 uses with %vreg1:sub1
+bool PeepholeOptimizer::foldRedundantCopy(
+ MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs,
+ DenseMap<unsigned, MachineInstr *> &CopyMIs) {
+ assert(MI->isCopy() && "expected a COPY machine instruction");
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return false;
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ return false;
+
+ if (CopySrcRegs.insert(SrcReg).second) {
+ // First copy of this reg seen.
+ CopyMIs.insert(std::make_pair(SrcReg, MI));
+ return false;
+ }
+
+ MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
+
+ unsigned SrcSubReg = MI->getOperand(1).getSubReg();
+ unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
+
+ // Can't replace different subregister extracts.
+ if (SrcSubReg != PrevSrcSubReg)
+ return false;
+
+ unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
+
+ // Only replace if the copy register class is the same.
+ //
+ // TODO: If we have multiple copies to different register classes, we may want
+ // to track multiple copies of the same source register.
+ if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
+ return false;
+
+ MRI->replaceRegWith(DstReg, PrevDstReg);
+
+ // Lifetime of the previous copy has been extended.
+ MRI->clearKillFlags(PrevDstReg);
+ return true;
+}
+
+bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
+ return TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ !MRI->isAllocatable(Reg);
+}
+
+bool PeepholeOptimizer::foldRedundantNAPhysCopy(
+ MachineInstr *MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) {
+ assert(MI->isCopy() && "expected a COPY machine instruction");
+
+ if (DisableNAPhysCopyOpt)
+ return false;
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ // %vreg = COPY %PHYSREG
+ // Avoid using a datastructure which can track multiple live non-allocatable
+ // phys->virt copies since LLVM doesn't seem to do this.
+ NAPhysToVirtMIs.insert({SrcReg, MI});
+ return false;
+ }
+
+ if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
+ return false;
+
+ // %PHYSREG = COPY %vreg
+ auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
+ if (PrevCopy == NAPhysToVirtMIs.end()) {
+ // We can't remove the copy: there was an intervening clobber of the
+ // non-allocatable physical register after the copy to virtual.
+ DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI
+ << '\n');
+ return false;
+ }
+
+ unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg();
+ if (PrevDstReg == SrcReg) {
+ // Remove the virt->phys copy: we saw the virtual register definition, and
+ // the non-allocatable physical register's state hasn't changed since then.
+ DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n');
+ ++NumNAPhysCopies;
+ return true;
+ }
+
+ // Potential missed optimization opportunity: we saw a different virtual
+ // register get a copy of the non-allocatable physical register, and we only
+ // track one such copy. Avoid getting confused by this new non-allocatable
+ // physical register definition, and remove it from the tracked copies.
+ DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n');
+ NAPhysToVirtMIs.erase(PrevCopy);
+ return false;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipOptnoneFunction(*MF.getFunction()))
return false;
@@ -1070,9 +1487,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
-
+ for (MachineBasicBlock &MBB : MF) {
bool SeenMoveImm = false;
// During this forward scan, at some point it needs to answer the question
@@ -1086,8 +1501,19 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
- for (MachineBasicBlock::iterator
- MII = I->begin(), MIE = I->end(); MII != MIE; ) {
+ // Track when a non-allocatable physical register is copied to a virtual
+ // register so that useless moves can be removed.
+ //
+ // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG`
+ // without any intervening re-definition of %PHYSREG.
+ DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
+
+ // Set of virtual registers that are copied from.
+ SmallSet<unsigned, 4> CopySrcRegs;
+ DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+
+ for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+ MII != MIE; ) {
MachineInstr *MI = &*MII;
// We may be erasing MI below, increment MII now.
++MII;
@@ -1097,20 +1523,60 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->isDebugValue())
continue;
- // If there exists an instruction which belongs to the following
- // categories, we will discard the load candidates.
- if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
- MI->isKill() || MI->isInlineAsm() ||
- MI->hasUnmodeledSideEffects()) {
+ // If we run into an instruction we can't fold across, discard
+ // the load candidates.
+ if (MI->isLoadFoldBarrier())
FoldAsLoadDefCandidates.clear();
+
+ if (MI->isPosition() || MI->isPHI())
+ continue;
+
+ if (!MI->isCopy()) {
+ for (const auto &Op : MI->operands()) {
+ // Visit all operands: definitions can be implicit or explicit.
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ if (Op.isDef() && isNAPhysCopy(Reg)) {
+ const auto &Def = NAPhysToVirtMIs.find(Reg);
+ if (Def != NAPhysToVirtMIs.end()) {
+ // A new definition of the non-allocatable physical register
+ // invalidates previous copies.
+ DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
+ << '\n');
+ NAPhysToVirtMIs.erase(Def);
+ }
+ }
+ } else if (Op.isRegMask()) {
+ const uint32_t *RegMask = Op.getRegMask();
+ for (auto &RegMI : NAPhysToVirtMIs) {
+ unsigned Def = RegMI.first;
+ if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
+ DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
+ << '\n');
+ NAPhysToVirtMIs.erase(Def);
+ }
+ }
+ }
+ }
+ }
+
+ if (MI->isImplicitDef() || MI->isKill())
+ continue;
+
+ if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
+ // Blow away all non-allocatable physical registers knowledge since we
+ // don't know what's correct anymore.
+ //
+ // FIXME: handle explicit asm clobbers.
+ DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI
+ << '\n');
+ NAPhysToVirtMIs.clear();
continue;
}
- if (MI->mayStore() || MI->isCall())
- FoldAsLoadDefCandidates.clear();
if ((isUncoalescableCopy(*MI) &&
optimizeUncoalescableCopy(MI, LocalMIs)) ||
- (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+ (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) ||
(MI->isSelect() && optimizeSelect(MI, LocalMIs))) {
// MI is deleted.
LocalMIs.erase(MI);
@@ -1129,17 +1595,26 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (MI->isCopy() &&
+ (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) ||
+ foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) {
+ LocalMIs.erase(MI);
+ MI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
SeenMoveImm = true;
} else {
- Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+ Changed |= optimizeExtInstr(MI, &MBB, LocalMIs);
// optimizeExtInstr might have created new instructions after MI
// and before the already incremented MII. Adjust MII so that the
// next iteration sees the new instructions.
MII = MI;
++MII;
if (SeenMoveImm)
- Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+ Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs);
}
// Check whether MI is a load candidate for folding into a later
@@ -1190,8 +1665,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
assert(Def->isCopy() && "Invalid definition");
// Copy instruction are supposed to be: Def = Src.
// If someone breaks this assumption, bad things will happen everywhere.
@@ -1199,30 +1673,27 @@ bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of src.
- // Bails as we do not support composing subreg yet.
- return false;
+ // Bails as we do not support composing subregs yet.
+ return ValueTrackerResult();
// Otherwise, we want the whole source.
const MachineOperand &Src = Def->getOperand(1);
- SrcReg = Src.getReg();
- SrcSubReg = Src.getSubReg();
- return true;
+ return ValueTrackerResult(Src.getReg(), Src.getSubReg());
}
-bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
assert(Def->isBitcast() && "Invalid definition");
// Bail if there are effects that a plain copy will not expose.
if (Def->hasUnmodeledSideEffects())
- return false;
+ return ValueTrackerResult();
// Bitcasts with more than one def are not supported.
if (Def->getDesc().getNumDefs() != 1)
- return false;
+ return ValueTrackerResult();
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of the src.
- // Bails as we do not support composing subreg yet.
- return false;
+ // Bails as we do not support composing subregs yet.
+ return ValueTrackerResult();
unsigned SrcIdx = Def->getNumOperands();
for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
@@ -1230,25 +1701,25 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
const MachineOperand &MO = Def->getOperand(OpIdx);
if (!MO.isReg() || !MO.getReg())
continue;
+ // Ignore dead implicit defs.
+ if (MO.isImplicit() && MO.isDead())
+ continue;
assert(!MO.isDef() && "We should have skipped all the definitions by now");
if (SrcIdx != EndOpIdx)
// Multiple sources?
- return false;
+ return ValueTrackerResult();
SrcIdx = OpIdx;
}
const MachineOperand &Src = Def->getOperand(SrcIdx);
- SrcReg = Src.getReg();
- SrcSubReg = Src.getSubReg();
- return true;
+ return ValueTrackerResult(Src.getReg(), Src.getSubReg());
}
-bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
"Invalid definition");
if (Def->getOperand(DefIdx).getSubReg())
- // If we are composing subreg, bails out.
+ // If we are composing subregs, bail out.
// The case we are checking is Def.<subreg> = REG_SEQUENCE.
// This should almost never happen as the SSA property is tracked at
// the register level (as opposed to the subreg level).
@@ -1262,16 +1733,16 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
// have this case.
// If we can ascertain (or force) that this never happens, we could
// turn that into an assertion.
- return false;
+ return ValueTrackerResult();
if (!TII)
// We could handle the REG_SEQUENCE here, but we do not want to
// duplicate the code from the generic TII.
- return false;
+ return ValueTrackerResult();
SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
- return false;
+ return ValueTrackerResult();
// We are looking at:
// Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
@@ -1279,41 +1750,38 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
for (auto &RegSeqInput : RegSeqInputRegs) {
if (RegSeqInput.SubIdx == DefSubReg) {
if (RegSeqInput.SubReg)
- // Bails if we have to compose sub registers.
- return false;
+ // Bail if we have to compose sub registers.
+ return ValueTrackerResult();
- SrcReg = RegSeqInput.Reg;
- SrcSubReg = RegSeqInput.SubReg;
- return true;
+ return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
}
}
// If the subreg we are tracking is super-defined by another subreg,
// we could follow this value. However, this would require to compose
// the subreg and we do not do that for now.
- return false;
+ return ValueTrackerResult();
}
-bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
"Invalid definition");
if (Def->getOperand(DefIdx).getSubReg())
- // If we are composing subreg, bails out.
+ // If we are composing subreg, bail out.
// Same remark as getNextSourceFromRegSequence.
// I.e., this may be turned into an assert.
- return false;
+ return ValueTrackerResult();
if (!TII)
// We could handle the REG_SEQUENCE here, but we do not want to
// duplicate the code from the generic TII.
- return false;
+ return ValueTrackerResult();
TargetInstrInfo::RegSubRegPair BaseReg;
TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
- return false;
+ return ValueTrackerResult();
// We are looking at:
// Def = INSERT_SUBREG v0, v1, sub1
@@ -1323,9 +1791,7 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
// #1 Check if the inserted register matches the required sub index.
if (InsertedReg.SubIdx == DefSubReg) {
- SrcReg = InsertedReg.Reg;
- SrcSubReg = InsertedReg.SubReg;
- return true;
+ return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
}
// #2 Otherwise, if the sub register we are looking for is not partial
// defined by the inserted element, we can look through the main
@@ -1333,10 +1799,10 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
const MachineOperand &MODef = Def->getOperand(DefIdx);
// If the result register (Def) and the base register (v0) do not
// have the same register class or if we have to compose
- // subregisters, bails out.
+ // subregisters, bail out.
if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
BaseReg.SubReg)
- return false;
+ return ValueTrackerResult();
// Get the TRI and check if the inserted sub-register overlaps with the
// sub-register we are tracking.
@@ -1344,121 +1810,138 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
if (!TRI ||
(TRI->getSubRegIndexLaneMask(DefSubReg) &
TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
- return false;
+ return ValueTrackerResult();
// At this point, the value is available in v0 via the same subreg
// we used for Def.
- SrcReg = BaseReg.Reg;
- SrcSubReg = DefSubReg;
- return true;
+ return ValueTrackerResult(BaseReg.Reg, DefSubReg);
}
-bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
assert((Def->isExtractSubreg() ||
Def->isExtractSubregLike()) && "Invalid definition");
// We are looking at:
// Def = EXTRACT_SUBREG v0, sub0
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// Indeed, if DefSubReg != 0, we would have to compose it with sub0.
if (DefSubReg)
- return false;
+ return ValueTrackerResult();
if (!TII)
// We could handle the EXTRACT_SUBREG here, but we do not want to
// duplicate the code from the generic TII.
- return false;
+ return ValueTrackerResult();
TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
- return false;
+ return ValueTrackerResult();
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
if (ExtractSubregInputReg.SubReg)
- return false;
+ return ValueTrackerResult();
// Otherwise, the value is available in the v0.sub0.
- SrcReg = ExtractSubregInputReg.Reg;
- SrcSubReg = ExtractSubregInputReg.SubIdx;
- return true;
+ return ValueTrackerResult(ExtractSubregInputReg.Reg,
+ ExtractSubregInputReg.SubIdx);
}
-bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
assert(Def->isSubregToReg() && "Invalid definition");
// We are looking at:
// Def = SUBREG_TO_REG Imm, v0, sub0
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// If DefSubReg != sub0, we would have to check that all the bits
// we track are included in sub0 and if yes, we would have to
// determine the right subreg in v0.
if (DefSubReg != Def->getOperand(3).getImm())
- return false;
- // Bails if we have to compose sub registers.
+ return ValueTrackerResult();
+ // Bail if we have to compose sub registers.
// Likewise, if v0.subreg != 0, we would have to compose it with sub0.
if (Def->getOperand(2).getSubReg())
- return false;
+ return ValueTrackerResult();
- SrcReg = Def->getOperand(2).getReg();
- SrcSubReg = Def->getOperand(3).getImm();
- return true;
+ return ValueTrackerResult(Def->getOperand(2).getReg(),
+ Def->getOperand(3).getImm());
+}
+
+/// \brief Explore each PHI incoming operand and return its sources
+ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
+ assert(Def->isPHI() && "Invalid definition");
+ ValueTrackerResult Res;
+
+ // If we look for a different subreg, bail as we do not support composing
+ // subregs yet.
+ if (Def->getOperand(0).getSubReg() != DefSubReg)
+ return ValueTrackerResult();
+
+ // Return all register sources for PHI instructions.
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
+ auto &MO = Def->getOperand(i);
+ assert(MO.isReg() && "Invalid PHI instruction");
+ Res.addSource(MO.getReg(), MO.getSubReg());
+ }
+
+ return Res;
}
-bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceImpl() {
assert(Def && "This method needs a valid definition");
assert(
(DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
if (Def->isCopy())
- return getNextSourceFromCopy(SrcReg, SrcSubReg);
+ return getNextSourceFromCopy();
if (Def->isBitcast())
- return getNextSourceFromBitcast(SrcReg, SrcSubReg);
+ return getNextSourceFromBitcast();
// All the remaining cases involve "complex" instructions.
- // Bails if we did not ask for the advanced tracking.
+ // Bail if we did not ask for the advanced tracking.
if (!UseAdvancedTracking)
- return false;
+ return ValueTrackerResult();
if (Def->isRegSequence() || Def->isRegSequenceLike())
- return getNextSourceFromRegSequence(SrcReg, SrcSubReg);
+ return getNextSourceFromRegSequence();
if (Def->isInsertSubreg() || Def->isInsertSubregLike())
- return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg);
+ return getNextSourceFromInsertSubreg();
if (Def->isExtractSubreg() || Def->isExtractSubregLike())
- return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg);
+ return getNextSourceFromExtractSubreg();
if (Def->isSubregToReg())
- return getNextSourceFromSubregToReg(SrcReg, SrcSubReg);
- return false;
+ return getNextSourceFromSubregToReg();
+ if (Def->isPHI())
+ return getNextSourceFromPHI();
+ return ValueTrackerResult();
}
-const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSource() {
// If we reach a point where we cannot move up in the use-def chain,
// there is nothing we can get.
if (!Def)
- return nullptr;
+ return ValueTrackerResult();
- const MachineInstr *PrevDef = nullptr;
- // Try to find the next source.
- if (getNextSourceImpl(SrcReg, SrcSubReg)) {
+ ValueTrackerResult Res = getNextSourceImpl();
+ if (Res.isValid()) {
// Update definition, definition index, and subregister for the
// next call of getNextSource.
// Update the current register.
- Reg = SrcReg;
- // Update the return value before moving up in the use-def chain.
- PrevDef = Def;
+ bool OneRegSrc = Res.getNumSources() == 1;
+ if (OneRegSrc)
+ Reg = Res.getSrcReg(0);
+ // Update the result before moving up in the use-def chain
+ // with the instruction containing the last found sources.
+ Res.setInst(Def);
+
// If we can still move up in the use-def chain, move to the next
- // defintion.
- if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // definition.
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
Def = MRI.getVRegDef(Reg);
DefIdx = MRI.def_begin(Reg).getOperandNo();
- DefSubReg = SrcSubReg;
- return PrevDef;
+ DefSubReg = Res.getSrcSubReg(0);
+ return Res;
}
}
// If we end up here, this means we will not be able to find another source
- // for the next iteration.
- // Make sure any new call to getNextSource bails out early by cutting the
- // use-def chain.
+ // for the next iteration. Make sure any new call to getNextSource bails out
+ // early by cutting the use-def chain.
Def = nullptr;
- return PrevDef;
+ return Res;
}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 6f76116..b95dffd 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -87,7 +87,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -196,7 +196,7 @@ SchedulePostRATDList::SchedulePostRATDList(
const RegisterClassInfo &RCI,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
- : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
+ : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) {
const InstrItineraryData *InstrItins =
MF.getSubtarget().getInstrItineraryData();
@@ -267,7 +267,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
RegClassInfo.runOnMachineFunction(Fn);
@@ -302,8 +302,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
CriticalPathRCs);
// Loop over all of the basic blocks
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
+ for (auto &MBB : Fn) {
#ifndef NDEBUG
// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
if (DebugDiv > 0) {
@@ -311,25 +310,25 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
if (bbcnt++ % DebugDiv != DebugMod)
continue;
dbgs() << "*** DEBUG scheduling " << Fn.getName()
- << ":BB#" << MBB->getNumber() << " ***\n";
+ << ":BB#" << MBB.getNumber() << " ***\n";
}
#endif
// Initialize register live-range state for scheduling in this block.
- Scheduler.startBlock(MBB);
+ Scheduler.startBlock(&MBB);
// Schedule each sequence of instructions not interrupted by a label
// or anything else that effectively needs to shut down scheduling.
- MachineBasicBlock::iterator Current = MBB->end();
- unsigned Count = MBB->size(), CurrentCount = Count;
- for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+ MachineBasicBlock::iterator Current = MBB.end();
+ unsigned Count = MBB.size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) {
MachineInstr *MI = std::prev(I);
--Count;
// Calls are not scheduling boundaries before register allocation, but
// post-ra we don't gain anything by scheduling across calls since we
// don't need to worry about register pressure.
- if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
- Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count);
+ if (MI->isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) {
+ Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
@@ -343,9 +342,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
Count -= MI->getBundleSize();
}
assert(Count == 0 && "Instruction count mismatch!");
- assert((MBB->begin() == Current || CurrentCount != 0) &&
+ assert((MBB.begin() == Current || CurrentCount != 0) &&
"Instruction count mismatch!");
- Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount);
Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
@@ -355,7 +354,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
Scheduler.finishBlock();
// Update register kills
- Scheduler.fixupKills(MBB);
+ Scheduler.fixupKills(&MBB);
}
return true;
@@ -400,8 +399,12 @@ void SchedulePostRATDList::schedule() {
}
DEBUG(dbgs() << "********** List Scheduling **********\n");
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
+ DEBUG(
+ for (const SUnit &SU : SUnits) {
+ SU.dumpAll(this);
+ dbgs() << '\n';
+ }
+ );
AvailableQueue.initNodes(SUnits);
ListScheduleTopDown();
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 5f81949..d27ea2f 100644
--- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -58,7 +58,7 @@ INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<AAResultsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -96,7 +96,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// This is a physreg implicit-def.
// Look for the first instruction to use or define an alias.
- MachineBasicBlock::instr_iterator UserMI = MI;
+ MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
bool Found = false;
for (++UserMI; UserMI != UserE; ++UserMI) {
@@ -151,7 +151,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(),
MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI)
if (MBBI->isImplicitDef())
- WorkList.insert(MBBI);
+ WorkList.insert(&*MBBI);
if (WorkList.empty())
continue;
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 6ca69a1..939c500 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -71,8 +71,9 @@ private:
// stack frame indexes.
unsigned MinCSFrameIndex, MaxCSFrameIndex;
- // Save and Restore blocks of the current function.
- MachineBasicBlock *SaveBlock;
+ // Save and Restore blocks of the current function. Typically there is a
+ // single save block, unless Windows EH funclets are involved.
+ SmallVector<MachineBasicBlock *, 1> SaveBlocks;
SmallVector<MachineBasicBlock *, 4> RestoreBlocks;
// Flag to control whether to use the register scavenger to resolve
@@ -91,9 +92,6 @@ private:
int &SPAdj);
void scavengeFrameVirtualRegs(MachineFunction &Fn);
void insertPrologEpilogCode(MachineFunction &Fn);
-
- // Convenience for recognizing return blocks.
- bool isReturnBlock(const MachineBasicBlock *MBB) const;
};
} // namespace
@@ -128,10 +126,6 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool PEI::isReturnBlock(const MachineBasicBlock* MBB) const {
- return (MBB && !MBB->empty() && MBB->back().isReturn());
-}
-
/// Compute the set of return blocks
void PEI::calculateSets(MachineFunction &Fn) {
const MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -142,25 +136,25 @@ void PEI::calculateSets(MachineFunction &Fn) {
// Use the points found by shrink-wrapping, if any.
if (MFI->getSavePoint()) {
- SaveBlock = MFI->getSavePoint();
+ SaveBlocks.push_back(MFI->getSavePoint());
assert(MFI->getRestorePoint() && "Both restore and save must be set");
MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
// If RestoreBlock does not have any successor and is not a return block
// then the end point is unreachable and we do not need to insert any
// epilogue.
- if (!RestoreBlock->succ_empty() || isReturnBlock(RestoreBlock))
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
RestoreBlocks.push_back(RestoreBlock);
return;
}
// Save refs to entry and return blocks.
- SaveBlock = Fn.begin();
- for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
- MBB != E; ++MBB)
- if (isReturnBlock(MBB))
- RestoreBlocks.push_back(MBB);
-
- return;
+ SaveBlocks.push_back(&Fn.front());
+ for (MachineBasicBlock &MBB : Fn) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
}
/// StackObjSet - A set of stack object indexes
@@ -195,7 +189,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// place all spills in the entry block, all restores in return blocks.
calculateSets(Fn);
- // Add the code to save and restore the callee saved registers
+ // Add the code to save and restore the callee saved registers.
if (!F->hasFnAttribute(Attribute::Naked))
insertCSRSpillsAndRestores(Fn);
@@ -237,6 +231,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
}
delete RS;
+ SaveBlocks.clear();
RestoreBlocks.clear();
return true;
}
@@ -407,7 +402,7 @@ static void updateLiveness(MachineFunction &MF) {
const MachineBasicBlock *CurBB = WorkList.pop_back_val();
// By construction, the region that is after the save point is
// dominated by the Save and post-dominated by the Restore.
- if (CurBB == Save)
+ if (CurBB == Save && Save != Restore)
continue;
// Enqueue all the successors not already visited.
// Those are by construction either before Save or after Restore.
@@ -419,10 +414,13 @@ static void updateLiveness(MachineFunction &MF) {
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- for (MachineBasicBlock *MBB : Visited)
+ for (MachineBasicBlock *MBB : Visited) {
+ MCPhysReg Reg = CSI[i].getReg();
// Add the callee-saved register as live-in.
// It's killed at the spill.
- MBB->addLiveIn(CSI[i].getReg());
+ if (!MBB->isLiveIn(Reg))
+ MBB->addLiveIn(Reg);
+ }
}
}
@@ -446,18 +444,20 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
MachineBasicBlock::iterator I;
// Spill using target interface.
- I = SaveBlock->begin();
- if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- // Insert the spill to the stack frame.
- unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
- RC, TRI);
+ for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ I = SaveBlock->begin();
+ if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
}
+ // Update the live-in information of all the blocks up to the save point.
+ updateLiveness(Fn);
}
- // Update the live-in information of all the blocks up to the save point.
- updateLiveness(Fn);
// Restore using target interface.
for (MachineBasicBlock *MBB : RestoreBlocks) {
@@ -500,7 +500,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
static inline void
AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
bool StackGrowsDown, int64_t &Offset,
- unsigned &MaxAlign) {
+ unsigned &MaxAlign, unsigned Skew) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
Offset += MFI->getObjectSize(FrameIdx);
@@ -512,7 +512,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
MaxAlign = std::max(MaxAlign, Align);
// Adjust to alignment boundary.
- Offset = (Offset + Align - 1) / Align * Align;
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
if (StackGrowsDown) {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
@@ -530,12 +530,12 @@ static void
AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
SmallSet<int, 16> &ProtectedObjs,
MachineFrameInfo *MFI, bool StackGrowsDown,
- int64_t &Offset, unsigned &MaxAlign) {
+ int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
E = UnassignedObjs.end(); I != E; ++I) {
int i = *I;
- AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
ProtectedObjs.insert(i);
}
}
@@ -563,6 +563,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
&& "Local area offset should be in direction of stack growth");
int64_t Offset = LocalAreaOffset;
+ // Skew to be applied to alignment.
+ unsigned Skew = TFI.getStackAlignmentSkew(Fn);
+
// If there are fixed sized objects that are preallocated in the local area,
// non-fixed objects can't be allocated right at the start of local area.
// We currently don't support filling in holes in between fixed sized
@@ -593,7 +596,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
unsigned Align = MFI->getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = RoundUpToAlignment(Offset, Align);
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
MFI->setObjectOffset(i, -Offset); // Set the computed offset
}
@@ -602,7 +605,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
for (int i = MaxCSFI; i >= MinCSFI ; --i) {
unsigned Align = MFI->getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = RoundUpToAlignment(Offset, Align);
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
MFI->setObjectOffset(i, Offset);
Offset += MFI->getObjectSize(i);
@@ -624,7 +627,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
RS->getScavengingFrameIndices(SFIs);
for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
IE = SFIs.end(); I != IE; ++I)
- AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
}
// FIXME: Once this is working, then enable flag will change to a target
@@ -635,7 +638,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
unsigned Align = MFI->getLocalFrameMaxAlign();
// Adjust to alignment boundary.
- Offset = RoundUpToAlignment(Offset, Align);
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
@@ -662,7 +665,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
StackObjSet AddrOfObjs;
AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
- Offset, MaxAlign);
+ Offset, MaxAlign, Skew);
// Assign large stack objects first.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
@@ -695,11 +698,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign);
+ Offset, MaxAlign, Skew);
AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign);
+ Offset, MaxAlign, Skew);
AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign);
+ Offset, MaxAlign, Skew);
}
// Then assign frame offsets to stack objects that are not used to spill
@@ -719,7 +722,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
if (ProtectedObjs.count(i))
continue;
- AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
}
// Make sure the special register scavenging spill slot is closest to the
@@ -729,7 +732,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
RS->getScavengingFrameIndices(SFIs);
for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
IE = SFIs.end(); I != IE; ++I)
- AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
}
if (!TFI.targetHandlesStackFrameRounding()) {
@@ -754,7 +757,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
- Offset = RoundUpToAlignment(Offset, StackAlign);
+ Offset = RoundUpToAlignment(Offset, StackAlign, Skew);
}
// Update frame info to pretend that this is part of the stack...
@@ -771,18 +774,24 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
// Add prologue to the function...
- TFI.emitPrologue(Fn, *SaveBlock);
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.emitPrologue(Fn, *SaveBlock);
// Add epilogue to restore the callee-save registers in each exiting block.
for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
TFI.emitEpilogue(Fn, *RestoreBlock);
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.inlineStackProbe(Fn, *SaveBlock);
+
// Emit additional code that is required to support segmented stacks, if
// we've been asked for it. This, when linked with a runtime with support
// for segmented stacks (libgcc is one), will result in allocating stack
// space in small chunks instead of one large contiguous block.
- if (Fn.shouldSplitStack())
- TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+ if (Fn.shouldSplitStack()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+ }
// Emit additional code that is required to explicitly handle the stack in
// HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
@@ -790,7 +799,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// different conditional check and another BIF for allocating more stack
// space.
if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
- TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
}
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
@@ -800,25 +810,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
if (!TFI.needsFrameIndexResolution(Fn)) return;
- MachineModuleInfo &MMI = Fn.getMMI();
- const Function *F = Fn.getFunction();
- const Function *ParentF = MMI.getWinEHParent(F);
- unsigned FrameReg;
- if (F == ParentF) {
- WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
- // FIXME: This should be unconditional but we have bugs in the preparation
- // pass.
- if (FuncInfo.UnwindHelpFrameIdx != INT_MAX)
- FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP(
- Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg);
- } else if (MMI.hasWinEHFuncInfo(F)) {
- WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
- auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F);
- if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end())
- FuncInfo.CatchHandlerParentFrameObjOffset[F] =
- TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg);
- }
-
// Store SPAdj at exit of a basic block.
SmallVector<int, 8> SPState;
SPState.resize(Fn.getNumBlockIDs());
@@ -841,12 +832,12 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
}
// Handle the unreachable blocks.
- for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (Reachable.count(BB))
+ for (auto &BB : Fn) {
+ if (Reachable.count(&BB))
// Already handled in DFS traversal.
continue;
int SPAdj = 0;
- replaceFrameIndices(BB, Fn, SPAdj);
+ replaceFrameIndices(&BB, Fn, SPAdj);
}
}
@@ -889,11 +880,11 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
if (!MI->getOperand(i).isFI())
continue;
- // Frame indicies in debug values are encoded in a target independent
+ // Frame indices in debug values are encoded in a target independent
// way with simply the frame index and offset rather than any
// target-specific addressing mode.
if (MI->isDebugValue()) {
- assert(i == 0 && "Frame indicies can only appear as the first "
+ assert(i == 0 && "Frame indices can only appear as the first "
"operand of a DBG_VALUE machine instruction");
unsigned Reg;
MachineOperand &Offset = MI->getOperand(1);
@@ -979,7 +970,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Run through the instructions and find any virtual registers.
for (MachineFunction::iterator BB = Fn.begin(),
E = Fn.end(); BB != E; ++BB) {
- RS->enterBasicBlock(BB);
+ RS->enterBasicBlock(&*BB);
int SPAdj = 0;
@@ -1026,12 +1017,8 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
- MachineRegisterInfo &MRI = Fn.getRegInfo();
Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
- // Make sure MRI now accounts this register as used.
- MRI.setPhysRegUsed(ScratchReg);
-
// Because this instruction was processed by the RS before this
// register was allocated, make sure that the RS now records the
// register as being used.
@@ -1044,7 +1031,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// problem because we need the spill code before I: Move I to just
// prior to J.
if (I != std::prev(J)) {
- BB->splice(J, BB, I);
+ BB->splice(J, &*BB, I);
// Before we move I, we need to prepare the RS to visit I again.
// Specifically, RS will assert if it sees uses of registers that
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
index b1c341d..1f46417 100644
--- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DerivedTypes.h"
@@ -22,87 +23,38 @@
#include <map>
using namespace llvm;
-namespace {
-struct PSVGlobalsTy {
- // PseudoSourceValues are immutable so don't need locking.
- const PseudoSourceValue PSVs[4];
- sys::Mutex Lock; // Guards FSValues, but not the values inside it.
- std::map<int, const PseudoSourceValue *> FSValues;
-
- PSVGlobalsTy() : PSVs() {}
- ~PSVGlobalsTy() {
- for (std::map<int, const PseudoSourceValue *>::iterator
- I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
- delete I->second;
- }
- }
-};
-
-static ManagedStatic<PSVGlobalsTy> PSVGlobals;
-
-} // anonymous namespace
-
-const PseudoSourceValue *PseudoSourceValue::getStack()
-{ return &PSVGlobals->PSVs[0]; }
-const PseudoSourceValue *PseudoSourceValue::getGOT()
-{ return &PSVGlobals->PSVs[1]; }
-const PseudoSourceValue *PseudoSourceValue::getJumpTable()
-{ return &PSVGlobals->PSVs[2]; }
-const PseudoSourceValue *PseudoSourceValue::getConstantPool()
-{ return &PSVGlobals->PSVs[3]; }
-
static const char *const PSVNames[] = {
- "Stack",
- "GOT",
- "JumpTable",
- "ConstantPool"
-};
+ "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
+ "GlobalValueCallEntry", "ExternalSymbolCallEntry"};
-PseudoSourceValue::PseudoSourceValue(bool isFixed) : isFixed(isFixed) {}
+PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {}
PseudoSourceValue::~PseudoSourceValue() {}
void PseudoSourceValue::printCustom(raw_ostream &O) const {
- O << PSVNames[this - PSVGlobals->PSVs];
-}
-
-const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
- PSVGlobalsTy &PG = *PSVGlobals;
- sys::ScopedLock locked(PG.Lock);
- const PseudoSourceValue *&V = PG.FSValues[FI];
- if (!V)
- V = new FixedStackPseudoSourceValue(FI);
- return V;
+ O << PSVNames[Kind];
}
bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
- if (this == getStack())
+ if (isStack())
return false;
- if (this == getGOT() ||
- this == getConstantPool() ||
- this == getJumpTable())
+ if (isGOT() || isConstantPool() || isJumpTable())
return true;
llvm_unreachable("Unknown PseudoSourceValue!");
}
-bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
- if (this == getStack() ||
- this == getGOT() ||
- this == getConstantPool() ||
- this == getJumpTable())
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+ if (isStack() || isGOT() || isConstantPool() || isJumpTable())
return false;
llvm_unreachable("Unknown PseudoSourceValue!");
}
-bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
- if (this == getGOT() ||
- this == getConstantPool() ||
- this == getJumpTable())
- return false;
- return true;
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+ return !(isGOT() || isConstantPool() || isJumpTable());
}
-bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+bool FixedStackPseudoSourceValue::isConstant(
+ const MachineFrameInfo *MFI) const {
return MFI && MFI->isImmutableObjectIndex(FI);
}
@@ -122,3 +74,69 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
OS << "FixedStack" << FI;
}
+
+CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind)
+ : PseudoSourceValue(Kind) {}
+
+bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool CallEntryPseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+ return false;
+}
+
+GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue(
+ const GlobalValue *GV)
+ : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {}
+
+ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES)
+ : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {}
+
+PseudoSourceValueManager::PseudoSourceValueManager()
+ : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT),
+ JumpTablePSV(PseudoSourceValue::JumpTable),
+ ConstantPoolPSV(PseudoSourceValue::ConstantPool) {}
+
+const PseudoSourceValue *PseudoSourceValueManager::getStack() {
+ return &StackPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getGOT() { return &GOTPSV; }
+
+const PseudoSourceValue *PseudoSourceValueManager::getConstantPool() {
+ return &ConstantPoolPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() {
+ return &JumpTablePSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) {
+ std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
+ if (!V)
+ V = llvm::make_unique<FixedStackPseudoSourceValue>(FI);
+ return V.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
+ std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
+ GlobalCallEntries[GV];
+ if (!E)
+ E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV);
+ return E.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
+ std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
+ ExternalCallEntries[ES];
+ if (!E)
+ E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES);
+ return E.get();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 0090332..cfe367d 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -133,8 +133,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
@@ -223,7 +223,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
SmallVector<unsigned, 8> PhysRegSpillCands;
// Check for an available register in this class.
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
while (unsigned PhysReg = Order.next()) {
// Check for interference in PhysReg
switch (Matrix->checkInterference(VirtReg, PhysReg)) {
@@ -276,7 +276,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
getAnalysis<LiveIntervals>(),
getAnalysis<LiveRegMatrix>());
- calculateSpillWeightsAndHints(*LIS, *MF,
+ calculateSpillWeightsAndHints(*LIS, *MF, VRM,
getAnalysis<MachineLoopInfo>(),
getAnalysis<MachineBlockFrequencyInfo>());
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index fd3d4d7..f4c076f 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -799,10 +799,9 @@ void RAFast::AllocateBasicBlock() {
MachineBasicBlock::iterator MII = MBB->begin();
// Add live-in registers as live.
- for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
- E = MBB->livein_end(); I != E; ++I)
- if (MRI->isAllocatable(*I))
- definePhysReg(MII, *I, regReserved);
+ for (const auto &LI : MBB->liveins())
+ if (MRI->isAllocatable(LI.PhysReg))
+ definePhysReg(MII, LI.PhysReg, regReserved);
SmallVector<unsigned, 8> VirtDead;
SmallVector<MachineInstr*, 32> Coalesced;
@@ -986,10 +985,6 @@ void RAFast::AllocateBasicBlock() {
}
}
- for (UsedInInstrSet::iterator
- I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
- MRI->setRegUnitUsed(*I);
-
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
UsedInInstr.clear();
@@ -1050,10 +1045,6 @@ void RAFast::AllocateBasicBlock() {
killVirtReg(VirtDead[i]);
VirtDead.clear();
- for (UsedInInstrSet::iterator
- I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
- MRI->setRegUnitUsed(*I);
-
if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
DEBUG(dbgs() << "-- coalescing: " << *MI);
Coalesced.push_back(MI);
@@ -1103,12 +1094,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
AllocateBasicBlock();
}
- // Add the clobber lists for all the instructions we skipped earlier.
- for (const MCInstrDesc *Desc : SkippedInstrs)
- if (const uint16_t *Defs = Desc->getImplicitDefs())
- while (*Defs)
- MRI->setPhysRegUsed(*Defs++);
-
// All machine operands and other references to virtual registers have been
// replaced. Remove the virtual registers.
MRI->clearVirtRegs();
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 7ebcf7f..945cb9e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -86,6 +86,14 @@ static cl::opt<bool> EnableLocalReassignment(
"may be compile time intensive"),
cl::init(false));
+static cl::opt<bool> EnableDeferredSpilling(
+ "enable-deferred-spilling", cl::Hidden,
+ cl::desc("Instead of spilling a variable right away, defer the actual "
+ "code insertion to the end of the allocation. That way the "
+ "allocator might still find a suitable coloring for this "
+ "variable because of other evicted variables."),
+ cl::init(false));
+
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned>
CSRFirstTimeCost("regalloc-csr-first-time-cost",
@@ -157,6 +165,11 @@ class RAGreedy : public MachineFunctionPass,
/// Live range will be spilled. No more splitting will be attempted.
RS_Spill,
+
+ /// Live range is in memory. Because of other evictions, it might get moved
+ /// in a register in the end.
+ RS_Memory,
+
/// There is nothing more we can do to this live range. Abort compilation
/// if it can't be assigned.
RS_Done
@@ -414,6 +427,7 @@ const char *const RAGreedy::StageName[] = {
"RS_Split",
"RS_Split2",
"RS_Spill",
+ "RS_Memory",
"RS_Done"
};
#endif
@@ -447,8 +461,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addPreserved<MachineBlockFrequencyInfo>();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
@@ -536,6 +550,13 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Unsplit ranges that couldn't be allocated immediately are deferred until
// everything else has been allocated.
Prio = Size;
+ } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
+ // Memory operand should be considered last.
+ // Change the priority such that Memory operand are assigned in
+ // the reverse order that they came in.
+ // TODO: Make this a member variable and probably do something about hints.
+ static unsigned MemOp = 0;
+ Prio = MemOp++;
} else {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
@@ -637,7 +658,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
//===----------------------------------------------------------------------===//
unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
unsigned PhysReg;
while ((PhysReg = Order.next())) {
if (PhysReg == PrevReg)
@@ -2450,7 +2471,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
unsigned Depth) {
unsigned CostPerUseLimit = ~0u;
// First try assigning a free register.
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
@@ -2512,13 +2533,23 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return PhysReg;
// Finally spill VirtReg itself.
- NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
- LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
- spiller().spill(LRE);
- setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+ if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
+ // TODO: This is experimental and in particular, we do not model
+ // the live range splitting done by spilling correctly.
+ // We would need a deep integration with the spiller to do the
+ // right thing here. Anyway, that is still good for early testing.
+ setStage(VirtReg, RS_Memory);
+ DEBUG(dbgs() << "Do as if this register is in memory\n");
+ NewVRegs.push_back(VirtReg.reg);
+ } else {
+ NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ spiller().spill(LRE);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
- if (VerifyEnabled)
- MF->verify(this, "After spilling");
+ if (VerifyEnabled)
+ MF->verify(this, "After spilling");
+ }
// The live virtual register requesting allocation was spilled, so tell
// the caller not to allocate anything during this round.
@@ -2555,7 +2586,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
initializeCSRCost();
- calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI);
+ calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI);
DEBUG(LIS->dump());
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index eeff73d..fd28b05 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -47,6 +47,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -497,8 +498,8 @@ void PBQPRAConstraintList::anchor() {}
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesCFG();
- au.addRequired<AliasAnalysis>();
- au.addPreserved<AliasAnalysis>();
+ au.addRequired<AAResultsWrapperPass>();
+ au.addPreserved<AAResultsWrapperPass>();
au.addRequired<SlotIndexes>();
au.addPreserved<SlotIndexes>();
au.addRequired<LiveIntervals>();
@@ -724,11 +725,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
MachineBlockFrequencyInfo &MBFI =
getAnalysis<MachineBlockFrequencyInfo>();
- calculateSpillWeightsAndHints(LIS, MF, getAnalysis<MachineLoopInfo>(), MBFI,
- normalizePBQPSpillWeight);
-
VirtRegMap &VRM = getAnalysis<VirtRegMap>();
+ calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(),
+ MBFI, normalizePBQPSpillWeight);
+
std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
MF.getRegInfo().freezeReservedRegs(MF);
@@ -805,33 +806,17 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-namespace {
-// A helper class for printing node and register info in a consistent way
-class PrintNodeInfo {
-public:
- typedef PBQP::RegAlloc::PBQPRAGraph Graph;
- typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId;
-
- PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {}
-
- void print(raw_ostream &OS) const {
+/// Create Printable object for node and register info.
+static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
+ const PBQP::RegAlloc::PBQPRAGraph &G) {
+ return Printable([NId, &G](raw_ostream &OS) {
const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
unsigned VReg = G.getNodeMetadata(NId).getVReg();
const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
- }
-
-private:
- const Graph &G;
- NodeId NId;
-};
-
-inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) {
- PR.print(OS);
- return OS;
+ });
}
-} // anonymous namespace
void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
for (auto NId : nodeIds()) {
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c911b9b..e7b3217 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -32,7 +32,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -93,7 +92,7 @@ namespace {
/// A LaneMask to remember on which subregister live ranges we need to call
/// shrinkToUses() later.
- unsigned ShrinkMask;
+ LaneBitmask ShrinkMask;
/// True if the main range of the currently coalesced intervals should be
/// checked for smaller live intervals.
@@ -164,15 +163,13 @@ namespace {
/// LaneMask are split as necessary. @p LaneMask are the lanes that
/// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
/// lanemasks already adjusted to the coalesced register.
- /// @returns false if live range conflicts couldn't get resolved.
- bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
- unsigned LaneMask, CoalescerPair &CP);
+ void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+ LaneBitmask LaneMask, CoalescerPair &CP);
/// Join the liveranges of two subregisters. Joins @p RRange into
/// @p LRange, @p RRange may be invalid afterwards.
- /// @returns false if live range conflicts couldn't get resolved.
- bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
- unsigned LaneMask, const CoalescerPair &CP);
+ void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ LaneBitmask LaneMask, const CoalescerPair &CP);
/// We found a non-trivially-coalescable copy. If the source value number is
/// defined by a copy from the destination reg see if we can merge these two
@@ -224,30 +221,17 @@ namespace {
/// Dst, we can drop \p Copy.
bool applyTerminalRule(const MachineInstr &Copy) const;
- /// Check whether or not \p LI is composed by multiple connected
- /// components and if that is the case, fix that.
- void splitNewRanges(LiveInterval *LI) {
- ConnectedVNInfoEqClasses ConEQ(*LIS);
- unsigned NumComps = ConEQ.Classify(LI);
- if (NumComps <= 1)
- return;
- SmallVector<LiveInterval*, 8> NewComps(1, LI);
- for (unsigned i = 1; i != NumComps; ++i) {
- unsigned VReg = MRI->createVirtualRegister(MRI->getRegClass(LI->reg));
- NewComps.push_back(&LIS->createEmptyInterval(VReg));
- }
-
- ConEQ.Distribute(&NewComps[0], *MRI);
- }
-
/// Wrapper method for \see LiveIntervals::shrinkToUses.
/// This method does the proper fixing of the live-ranges when the afore
/// mentioned method returns true.
void shrinkToUses(LiveInterval *LI,
SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
- if (LIS->shrinkToUses(LI, Dead))
- // We may have created multiple connected components, split them.
- splitNewRanges(LI);
+ if (LIS->shrinkToUses(LI, Dead)) {
+ /// Check whether or not \p LI is composed by multiple connected
+ /// components and if that is the case, fix that.
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS->splitSeparateComponents(*LI, SplitLIs);
+ }
}
public:
@@ -275,7 +259,7 @@ INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
@@ -453,7 +437,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
@@ -679,14 +663,18 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
unsigned UseOpIdx;
if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
return false;
- unsigned Op1, Op2, NewDstIdx;
- if (!TII->findCommutedOpIndices(DefMI, Op1, Op2))
- return false;
- if (Op1 == UseOpIdx)
- NewDstIdx = Op2;
- else if (Op2 == UseOpIdx)
- NewDstIdx = Op1;
- else
+
+ // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
+ // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
+ // passed to the method. That _other_ operand is chosen by
+ // the findCommutedOpIndices() method.
+ //
+ // That is obviously an area for improvement in case of instructions having
+ // more than 2 operands. For example, if some instruction has 3 commutable
+ // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
+ // op#2<->op#3) of commute transformation should be considered/tried here.
+ unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (!TII->findCommutedOpIndices(DefMI, UseOpIdx, NewDstIdx))
return false;
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
@@ -719,7 +707,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// At this point we have decided that it is legal to do this
// transformation. Start by commuting the instruction.
MachineBasicBlock *MBB = DefMI->getParent();
- MachineInstr *NewMI = TII->commuteInstruction(DefMI);
+ MachineInstr *NewMI =
+ TII->commuteInstruction(DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
return false;
if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
@@ -804,7 +793,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
if (IntB.hasSubRanges()) {
if (!IntA.hasSubRanges()) {
- unsigned Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
IntA.createSubRangeFrom(Allocator, Mask, IntA);
}
SlotIndex AIdx = CopyIdx.getRegSlot(true);
@@ -812,20 +801,21 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
assert(ASubValNo != nullptr);
- unsigned AMask = SA.LaneMask;
+ LaneBitmask AMask = SA.LaneMask;
for (LiveInterval::SubRange &SB : IntB.subranges()) {
- unsigned BMask = SB.LaneMask;
- unsigned Common = BMask & AMask;
+ LaneBitmask BMask = SB.LaneMask;
+ LaneBitmask Common = BMask & AMask;
if (Common == 0)
continue;
- DEBUG(
- dbgs() << format("\t\tCopy+Merge %04X into %04X\n", BMask, Common));
- unsigned BRest = BMask & ~AMask;
+ DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
+ << " into " << PrintLaneMask(Common) << '\n');
+ LaneBitmask BRest = BMask & ~AMask;
LiveInterval::SubRange *CommonRange;
if (BRest != 0) {
SB.LaneMask = BRest;
- DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", BRest));
+ DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
+ << '\n');
// Duplicate SubRange for newly merged common stuff.
CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB);
} else {
@@ -842,7 +832,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
AMask &= ~BMask;
}
if (AMask != 0) {
- DEBUG(dbgs() << format("\t\tNew Lane %04X\n", AMask));
+ DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
@@ -1107,7 +1097,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
// CopyMI is undef iff SrcReg is not live before the instruction.
if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
- unsigned SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
+ LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
if ((SR.LaneMask & SrcMask) == 0)
continue;
@@ -1128,7 +1118,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
DstLI.MergeValueNumberInto(VNI, PrevVNI);
// The affected subregister segments can be removed.
- unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
+ LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
for (LiveInterval::SubRange &SR : DstLI.subranges()) {
if ((SR.LaneMask & DstMask) == 0)
continue;
@@ -1147,7 +1137,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
continue;
const MachineInstr &MI = *MO.getParent();
SlotIndex UseIdx = LIS->getInstructionIndex(&MI);
- unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
bool isLive;
if (UseMask != ~0u && DstLI.hasSubRanges()) {
isLive = false;
@@ -1213,10 +1203,10 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
if (!DstInt->hasSubRanges()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
}
- unsigned Mask = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx);
bool IsUndef = true;
SlotIndex MIIdx = UseMI->isDebugValue()
? LIS->getSlotIndexes()->getIndexBefore(UseMI)
@@ -1445,8 +1435,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
for (LiveInterval::SubRange &S : LI.subranges()) {
if ((S.LaneMask & ShrinkMask) == 0)
continue;
- DEBUG(dbgs() << "Shrink LaneUses (Lane "
- << format("%04X", S.LaneMask) << ")\n");
+ DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
+ << ")\n");
LIS->shrinkToUses(S, LI.reg);
}
LI.removeEmptySubRanges();
@@ -1644,7 +1634,7 @@ class JoinVals {
const unsigned SubIdx;
/// The LaneMask that this liverange will occupy the coalesced register. May
/// be smaller than the lanemask produced by SubIdx when merging subranges.
- const unsigned LaneMask;
+ const LaneBitmask LaneMask;
/// This is true when joining sub register ranges, false when joining main
/// ranges.
@@ -1699,11 +1689,11 @@ class JoinVals {
ConflictResolution Resolution;
/// Lanes written by this def, 0 for unanalyzed values.
- unsigned WriteLanes;
+ LaneBitmask WriteLanes;
/// Lanes with defined values in this register. Other lanes are undef and
/// safe to clobber.
- unsigned ValidLanes;
+ LaneBitmask ValidLanes;
/// Value in LI being redefined by this def.
VNInfo *RedefVNI;
@@ -1744,7 +1734,7 @@ class JoinVals {
/// Compute the bitmask of lanes actually written by DefMI.
/// Set Redef if there are any partial register definitions that depend on the
/// previous value of the register.
- unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
+ LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
/// Find the ultimate value that VNI was copied from.
std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
@@ -1780,12 +1770,12 @@ class JoinVals {
/// entry to TaintedVals.
///
/// Returns false if the tainted lanes extend beyond the basic block.
- bool taintExtent(unsigned, unsigned, JoinVals&,
- SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+ bool taintExtent(unsigned, LaneBitmask, JoinVals&,
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> >&);
/// Return true if MI uses any of the given Lanes from Reg.
/// This does not include partial redefinitions of Reg.
- bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const;
+ bool usesLanes(const MachineInstr *MI, unsigned, unsigned, LaneBitmask) const;
/// Determine if ValNo is a copy of a value number in LR or Other.LR that will
/// be pruned:
@@ -1796,7 +1786,7 @@ class JoinVals {
bool isPrunedValue(unsigned ValNo, JoinVals &Other);
public:
- JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, unsigned LaneMask,
+ JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
bool TrackSubRegLiveness)
@@ -1822,8 +1812,8 @@ public:
/// Removes subranges starting at copies that get removed. This sometimes
/// happens when undefined subranges are copied around. These ranges contain
- /// no usefull information and can be removed.
- void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask);
+ /// no useful information and can be removed.
+ void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
/// Erase any machine instructions that have been coalesced away.
/// Add erased instructions to ErasedInstrs.
@@ -1840,9 +1830,9 @@ public:
};
} // end anonymous namespace
-unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
+LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
const {
- unsigned L = 0;
+ LaneBitmask L = 0;
for (const MachineOperand &MO : DefMI->operands()) {
if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
continue;
@@ -1879,7 +1869,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
ValueIn = nullptr;
for (const LiveInterval::SubRange &S : LI.subranges()) {
// Transform lanemask to a mask in the joined live interval.
- unsigned SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
+ LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
if ((SMask & LaneMask) == 0)
continue;
LiveQueryResult LRQ = S.Query(Def);
@@ -1928,7 +1918,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
const MachineInstr *DefMI = nullptr;
if (VNI->isPHIDef()) {
// Conservatively assume that all lanes in a PHI are valid.
- unsigned Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
V.ValidLanes = V.WriteLanes = Lanes;
} else {
DefMI = Indexes->getInstructionFromIndex(VNI->def);
@@ -2190,8 +2180,8 @@ bool JoinVals::mapValues(JoinVals &Other) {
}
bool JoinVals::
-taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
- SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
+taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> > &TaintExtent) {
VNInfo *VNI = LR.getValNumInfo(ValNo);
MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
@@ -2230,7 +2220,7 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
}
bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx,
- unsigned Lanes) const {
+ LaneBitmask Lanes) const {
if (MI->isDebugValue())
return false;
for (const MachineOperand &MO : MI->operands()) {
@@ -2264,8 +2254,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
// VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
// join, those lanes will be tainted with a wrong value. Get the extent of
// the tainted lanes.
- unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
- SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
+ LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+ SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
// Tainted lanes would extend beyond the basic block.
return false;
@@ -2384,7 +2374,7 @@ void JoinVals::pruneValues(JoinVals &Other,
}
}
-void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
+void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
{
// Look for values being erased.
bool DidPrune = false;
@@ -2401,7 +2391,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
// copied and we must remove that subrange value as well.
VNInfo *ValueOut = Q.valueOutOrDead();
if (ValueOut != nullptr && Q.valueIn() == nullptr) {
- DEBUG(dbgs() << "\t\tPrune sublane " << format("%04X", S.LaneMask)
+ DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
<< " at " << Def << "\n");
LIS->pruneValue(S, Def, nullptr);
DidPrune = true;
@@ -2410,10 +2400,10 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
continue;
}
// If a subrange ends at the copy, then a value was copied but only
- // partially used later. Shrink the subregister range apropriately.
+ // partially used later. Shrink the subregister range appropriately.
if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
- DEBUG(dbgs() << "\t\tDead uses at sublane "
- << format("%04X", S.LaneMask) << " at " << Def << "\n");
+ DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask)
+ << " at " << Def << "\n");
ShrinkMask |= S.LaneMask;
}
}
@@ -2477,8 +2467,8 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
}
}
-bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
- unsigned LaneMask,
+void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ LaneBitmask LaneMask,
const CoalescerPair &CP) {
SmallVector<VNInfo*, 16> NewVNInfo;
JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
@@ -2492,13 +2482,15 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
// ranges get mapped to the "overflow" lane mask bit which creates unexpected
// interferences.
if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
- DEBUG(dbgs() << "*** Couldn't join subrange!\n");
- return false;
+ // We already determined that it is legal to merge the intervals, so this
+ // should never fail.
+ llvm_unreachable("*** Couldn't join subrange!\n");
}
if (!LHSVals.resolveConflicts(RHSVals) ||
!RHSVals.resolveConflicts(LHSVals)) {
- DEBUG(dbgs() << "*** Couldn't join subrange!\n");
- return false;
+ // We already determined that it is legal to merge the intervals, so this
+ // should never fail.
+ llvm_unreachable("*** Couldn't join subrange!\n");
}
// The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2521,36 +2513,37 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
if (EndPoints.empty())
- return true;
+ return;
// Recompute the parts of the live range we had to remove because of
// CR_Replace conflicts.
DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
<< " points: " << LRange << '\n');
LIS->extendToIndices(LRange, EndPoints);
- return true;
}
-bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
const LiveRange &ToMerge,
- unsigned LaneMask, CoalescerPair &CP) {
+ LaneBitmask LaneMask,
+ CoalescerPair &CP) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &R : LI.subranges()) {
- unsigned RMask = R.LaneMask;
+ LaneBitmask RMask = R.LaneMask;
// LaneMask of subregisters common to subrange R and ToMerge.
- unsigned Common = RMask & LaneMask;
+ LaneBitmask Common = RMask & LaneMask;
// There is nothing to do without common subregs.
if (Common == 0)
continue;
- DEBUG(dbgs() << format("\t\tCopy+Merge %04X into %04X\n", RMask, Common));
+ DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
+ << PrintLaneMask(Common) << '\n');
// LaneMask of subregisters contained in the R range but not in ToMerge,
// they have to split into their own subrange.
- unsigned LRest = RMask & ~LaneMask;
+ LaneBitmask LRest = RMask & ~LaneMask;
LiveInterval::SubRange *CommonRange;
if (LRest != 0) {
R.LaneMask = LRest;
- DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", LRest));
+ DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
// Duplicate SubRange for newly merged common stuff.
CommonRange = LI.createSubRangeFrom(Allocator, Common, R);
} else {
@@ -2559,16 +2552,14 @@ bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
CommonRange = &R;
}
LiveRange RangeCopy(ToMerge, Allocator);
- if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP))
- return false;
+ joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
LaneMask &= ~RMask;
}
if (LaneMask != 0) {
- DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask));
+ DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
}
- return true;
}
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
@@ -2602,15 +2593,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// create initial subranges if necessary.
unsigned DstIdx = CP.getDstIdx();
if (!LHS.hasSubRanges()) {
- unsigned Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
- : TRI->getSubRegIndexLaneMask(DstIdx);
+ LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(DstIdx);
// LHS must support subregs or we wouldn't be in this codepath.
assert(Mask != 0);
LHS.createSubRangeFrom(Allocator, Mask, LHS);
} else if (DstIdx != 0) {
// Transform LHS lanemasks to new register class if necessary.
for (LiveInterval::SubRange &R : LHS.subranges()) {
- unsigned Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
+ LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
R.LaneMask = Mask;
}
}
@@ -2619,41 +2610,21 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// Determine lanemasks of RHS in the coalesced register and merge subranges.
unsigned SrcIdx = CP.getSrcIdx();
- bool Abort = false;
if (!RHS.hasSubRanges()) {
- unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
- : TRI->getSubRegIndexLaneMask(SrcIdx);
- if (!mergeSubRangeInto(LHS, RHS, Mask, CP))
- Abort = true;
+ LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(SrcIdx);
+ mergeSubRangeInto(LHS, RHS, Mask, CP);
} else {
// Pair up subranges and merge.
for (LiveInterval::SubRange &R : RHS.subranges()) {
- unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
- if (!mergeSubRangeInto(LHS, R, Mask, CP)) {
- Abort = true;
- break;
- }
+ LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
+ mergeSubRangeInto(LHS, R, Mask, CP);
}
}
- if (Abort) {
- // This shouldn't have happened :-(
- // However we are aware of at least one existing problem where we
- // can't merge subranges when multiple ranges end up in the
- // "overflow bit" 32. As a workaround we drop all subregister ranges
- // which means we loose some precision but are back to a well defined
- // state.
- assert(TargetRegisterInfo::isImpreciseLaneMask(
- CP.getNewRC()->getLaneMask())
- && "SubRange merge should only fail when merging into bit 32.");
- DEBUG(dbgs() << "\tSubrange join aborted!\n");
- LHS.clearSubRanges();
- RHS.clearSubRanges();
- } else {
- DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
+ DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
- LHSVals.pruneSubRegValues(LHS, ShrinkMask);
- RHSVals.pruneSubRegValues(LHS, ShrinkMask);
- }
+ LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ RHSVals.pruneSubRegValues(LHS, ShrinkMask);
}
// The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2799,7 +2770,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
!isTerminalReg(DstReg, Copy, MRI))
return false;
- // DstReg is a terminal node. Check if it inteferes with any other
+ // DstReg is a terminal node. Check if it interferes with any other
// copy involving SrcReg.
const MachineBasicBlock *OrigBB = Copy.getParent();
const LiveInterval &DstLI = LIS->getInterval(DstReg);
@@ -2904,7 +2875,7 @@ void RegisterCoalescer::joinAllIntervals() {
std::vector<MBBPriorityInfo> MBBs;
MBBs.reserve(MF->size());
for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
- MachineBasicBlock *MBB = I;
+ MachineBasicBlock *MBB = &*I;
MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
JoinSplitEdges && isSplitEdge(MBB)));
}
@@ -2943,7 +2914,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
TRI = STI.getRegisterInfo();
TII = STI.getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
Loops = &getAnalysis<MachineLoopInfo>();
if (EnableGlobalCopies == cl::BOU_UNSET)
JoinGlobalCopies = STI.enableJoinGlobalCopies();
@@ -2981,22 +2952,25 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
if (MRI->recomputeRegClass(Reg)) {
DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
<< TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
+ ++NumInflated;
+
LiveInterval &LI = LIS->getInterval(Reg);
- unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
- if (MaxMask == 0) {
+ if (LI.hasSubRanges()) {
// If the inflated register class does not support subregisters anymore
// remove the subranges.
- LI.clearSubRanges();
- } else {
+ if (!MRI->shouldTrackSubRegLiveness(Reg)) {
+ LI.clearSubRanges();
+ } else {
#ifndef NDEBUG
- // If subranges are still supported, then the same subregs should still
- // be supported.
- for (LiveInterval::SubRange &S : LI.subranges()) {
- assert ((S.LaneMask & ~MaxMask) == 0);
- }
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ // If subranges are still supported, then the same subregs
+ // should still be supported.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ assert((S.LaneMask & ~MaxMask) == 0);
+ }
#endif
+ }
}
- ++NumInflated;
}
}
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index c3786e5..8382b09 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -59,12 +59,12 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << "Max Pressure: ";
dumpRegSetPressure(MaxSetPressure, TRI);
dbgs() << "Live In: ";
- for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
- dbgs() << PrintVRegOrUnit(LiveInRegs[i], TRI) << " ";
+ for (unsigned Reg : LiveInRegs)
+ dbgs() << PrintVRegOrUnit(Reg, TRI) << " ";
dbgs() << '\n';
dbgs() << "Live Out: ";
- for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
- dbgs() << PrintVRegOrUnit(LiveOutRegs[i], TRI) << " ";
+ for (unsigned Reg : LiveOutRegs)
+ dbgs() << PrintVRegOrUnit(Reg, TRI) << " ";
dbgs() << '\n';
}
@@ -78,11 +78,13 @@ void RegPressureTracker::dump() const {
}
void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
+ const char *sep = "";
for (const PressureChange &Change : *this) {
- if (!Change.isValid() || Change.getUnitInc() == 0)
- continue;
- dbgs() << " " << TRI.getRegPressureSetName(Change.getPSet())
+ if (!Change.isValid())
+ break;
+ dbgs() << sep << TRI.getRegPressureSetName(Change.getPSet())
<< " " << Change.getUnitInc();
+ sep = " ";
}
dbgs() << '\n';
}
@@ -90,8 +92,8 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
/// Increase the current pressure as impacted by these registers and bump
/// the high water mark if needed.
void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
- for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
- PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]);
+ for (unsigned RegUnit : RegUnits) {
+ PSetIterator PSetI = MRI->getPressureSets(RegUnit);
unsigned Weight = PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI) {
CurrSetPressure[*PSetI] += Weight;
@@ -104,8 +106,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
/// Simply decrease the current pressure as impacted by these registers.
void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) {
- for (unsigned I = 0, E = RegUnits.size(); I != E; ++I)
- decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I]));
+ for (unsigned RegUnit : RegUnits)
+ decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnit));
}
/// Clear the result so it can be used for another round of pressure tracking.
@@ -157,10 +159,22 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
LiveInRegs.clear();
}
-const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const {
+void LiveRegSet::init(const MachineRegisterInfo &MRI) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned NumRegUnits = TRI.getNumRegs();
+ unsigned NumVirtRegs = MRI.getNumVirtRegs();
+ Regs.setUniverse(NumRegUnits + NumVirtRegs);
+ this->NumRegUnits = NumRegUnits;
+}
+
+void LiveRegSet::clear() {
+ Regs.clear();
+}
+
+static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) {
if (TargetRegisterInfo::isVirtualRegister(Reg))
- return &LIS->getInterval(Reg);
- return LIS->getCachedRegUnit(Reg);
+ return &LIS.getInterval(Reg);
+ return LIS.getCachedRegUnit(Reg);
}
void RegPressureTracker::reset() {
@@ -176,8 +190,7 @@ void RegPressureTracker::reset() {
else
static_cast<RegionPressure&>(P).reset();
- LiveRegs.PhysRegs.clear();
- LiveRegs.VirtRegs.clear();
+ LiveRegs.clear();
UntiedDefs.clear();
}
@@ -210,8 +223,7 @@ void RegPressureTracker::init(const MachineFunction *mf,
P.MaxSetPressure = CurrSetPressure;
- LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
- LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
+ LiveRegs.init(*MRI);
if (TrackUntiedDefs)
UntiedDefs.setUniverse(MRI->getNumVirtRegs());
}
@@ -250,14 +262,8 @@ void RegPressureTracker::closeTop() {
static_cast<RegionPressure&>(P).TopPos = CurrPos;
assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
- P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
- P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
- for (SparseSet<unsigned>::const_iterator I =
- LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
- P.LiveInRegs.push_back(*I);
- std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
- P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
- P.LiveInRegs.end());
+ P.LiveInRegs.reserve(LiveRegs.size());
+ LiveRegs.appendTo(P.LiveInRegs);
}
/// Set the boundary for the bottom of the region and summarize live outs.
@@ -268,21 +274,14 @@ void RegPressureTracker::closeBottom() {
static_cast<RegionPressure&>(P).BottomPos = CurrPos;
assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
- P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
- P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
- for (SparseSet<unsigned>::const_iterator I =
- LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
- P.LiveOutRegs.push_back(*I);
- std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
- P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
- P.LiveOutRegs.end());
+ P.LiveOutRegs.reserve(LiveRegs.size());
+ LiveRegs.appendTo(P.LiveOutRegs);
}
/// Finalize the region boundaries and record live ins and live outs.
void RegPressureTracker::closeRegion() {
if (!isTopClosed() && !isBottomClosed()) {
- assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() &&
- "no region boundary");
+ assert(LiveRegs.size() == 0 && "no region boundary");
return;
}
if (!isBottomClosed())
@@ -299,8 +298,7 @@ void RegPressureTracker::closeRegion() {
void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0);
assert(isBottomClosed() && "need bottom-up tracking to intialize.");
- for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) {
- unsigned Reg = P.LiveOutRegs[i];
+ for (unsigned Reg : P.LiveOutRegs) {
if (TargetRegisterInfo::isVirtualRegister(Reg)
&& !RPTracker.hasUntiedDef(Reg)) {
increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg));
@@ -315,71 +313,113 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
}
namespace {
-/// Collect this instruction's unique uses and defs into SmallVectors for
-/// processing defs and uses in order.
-///
-/// FIXME: always ignore tied opers
-class RegisterOperands {
- const TargetRegisterInfo *TRI;
- const MachineRegisterInfo *MRI;
- bool IgnoreDead;
+/// List of register defined and used by a machine instruction.
+class RegisterOperands {
public:
SmallVector<unsigned, 8> Uses;
SmallVector<unsigned, 8> Defs;
SmallVector<unsigned, 8> DeadDefs;
- RegisterOperands(const TargetRegisterInfo *tri,
- const MachineRegisterInfo *mri, bool ID = false):
- TRI(tri), MRI(mri), IgnoreDead(ID) {}
+ void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI, bool IgnoreDead = false);
+
+ /// Use liveness information to find dead defs not marked with a dead flag
+ /// and move them to the DeadDefs vector.
+ void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS);
+};
+
+/// Collect this instruction's unique uses and defs into SmallVectors for
+/// processing defs and uses in order.
+///
+/// FIXME: always ignore tied opers
+class RegisterOperandsCollector {
+ RegisterOperands &RegOpers;
+ const TargetRegisterInfo &TRI;
+ const MachineRegisterInfo &MRI;
+ bool IgnoreDead;
+
+ RegisterOperandsCollector(RegisterOperands &RegOpers,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ bool IgnoreDead)
+ : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {}
+
+ void collectInstr(const MachineInstr &MI) const {
+ for (ConstMIBundleOperands OperI(&MI); OperI.isValid(); ++OperI)
+ collectOperand(*OperI);
+
+ // Remove redundant physreg dead defs.
+ SmallVectorImpl<unsigned>::iterator I =
+ std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
+ std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
+ RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
+ }
- /// Push this operand's register onto the correct vector.
- void collect(const MachineOperand &MO) {
+ /// Push this operand's register onto the correct vectors.
+ void collectOperand(const MachineOperand &MO) const {
if (!MO.isReg() || !MO.getReg())
return;
+ unsigned Reg = MO.getReg();
if (MO.readsReg())
- pushRegUnits(MO.getReg(), Uses);
+ pushRegUnits(Reg, RegOpers.Uses);
if (MO.isDef()) {
if (MO.isDead()) {
if (!IgnoreDead)
- pushRegUnits(MO.getReg(), DeadDefs);
- }
- else
- pushRegUnits(MO.getReg(), Defs);
+ pushRegUnits(Reg, RegOpers.DeadDefs);
+ } else
+ pushRegUnits(Reg, RegOpers.Defs);
}
}
-protected:
- void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) {
+ void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) const {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
if (containsReg(RegUnits, Reg))
return;
RegUnits.push_back(Reg);
- }
- else if (MRI->isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ } else if (MRI.isAllocatable(Reg)) {
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
if (containsReg(RegUnits, *Units))
continue;
RegUnits.push_back(*Units);
}
}
}
-};
-} // namespace
-/// Collect physical and virtual register operands.
-static void collectOperands(const MachineInstr *MI,
- RegisterOperands &RegOpers) {
- for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
- RegOpers.collect(*OperI);
+ friend class RegisterOperands;
+};
- // Remove redundant physreg dead defs.
- SmallVectorImpl<unsigned>::iterator I =
- std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
- std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
- RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
+void RegisterOperands::collect(const MachineInstr &MI,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ bool IgnoreDead) {
+ RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead);
+ Collector.collectInstr(MI);
+}
+
+void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
+ const LiveIntervals &LIS) {
+ SlotIndex SlotIdx = LIS.getInstructionIndex(&MI);
+ for (SmallVectorImpl<unsigned>::iterator RI = Defs.begin();
+ RI != Defs.end(); /*empty*/) {
+ unsigned Reg = *RI;
+ const LiveRange *LR = getLiveRange(LIS, Reg);
+ if (LR != nullptr) {
+ LiveQueryResult LRQ = LR->Query(SlotIdx);
+ if (LRQ.isDeadDef()) {
+ // LiveIntervals knows this is a dead even though it's MachineOperand is
+ // not flagged as such.
+ DeadDefs.push_back(Reg);
+ RI = Defs.erase(RI);
+ continue;
+ }
+ }
+ ++RI;
+ }
}
+} // namespace
+
/// Initialize an array of N PressureDiffs.
void PressureDiffs::init(unsigned N) {
Size = N;
@@ -399,7 +439,7 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI) {
// Find an existing entry in the pressure diff for this PSet.
- PressureDiff::iterator I = begin(), E = end();
+ PressureDiff::iterator I = nonconst_begin(), E = nonconst_end();
for (; I != E && I->isValid(); ++I) {
if (I->getPSet() >= *PSetI)
break;
@@ -411,10 +451,20 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
if (!I->isValid() || I->getPSet() != *PSetI) {
PressureChange PTmp = PressureChange(*PSetI);
for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J)
- std::swap(*J,PTmp);
+ std::swap(*J, PTmp);
}
// Update the units for this pressure set.
- I->setUnitInc(I->getUnitInc() + Weight);
+ unsigned NewUnitInc = I->getUnitInc() + Weight;
+ if (NewUnitInc != 0) {
+ I->setUnitInc(NewUnitInc);
+ } else {
+ // Remove entry
+ PressureDiff::iterator J;
+ for (J = std::next(I); J != E && J->isValid(); ++J, ++I)
+ *I = *J;
+ if (J != E)
+ *I = *J;
+ }
}
}
@@ -423,18 +473,18 @@ static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers,
const MachineRegisterInfo *MRI) {
assert(!PDiff.begin()->isValid() && "stale PDiff");
- for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i)
- PDiff.addPressureChange(RegOpers.Defs[i], true, MRI);
+ for (unsigned Reg : RegOpers.Defs)
+ PDiff.addPressureChange(Reg, true, MRI);
- for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i)
- PDiff.addPressureChange(RegOpers.Uses[i], false, MRI);
+ for (unsigned Reg : RegOpers.Uses)
+ PDiff.addPressureChange(Reg, false, MRI);
}
/// Force liveness of registers.
void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
- for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
- if (LiveRegs.insert(Regs[i]))
- increaseRegPressure(Regs[i]);
+ for (unsigned Reg : Regs) {
+ if (LiveRegs.insert(Reg))
+ increaseRegPressure(Reg);
}
}
@@ -465,13 +515,9 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) {
/// registers that are both defined and used by the instruction. If a pressure
/// difference pointer is provided record the changes is pressure caused by this
/// instruction independent of liveness.
-bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
+void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
PressureDiff *PDiff) {
- // Check for the top of the analyzable region.
- if (CurrPos == MBB->begin()) {
- closeRegion();
- return false;
- }
+ assert(CurrPos != MBB->begin());
if (!isBottomClosed())
closeBottom();
@@ -483,11 +529,8 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
do
--CurrPos;
while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+ assert(!CurrPos->isDebugValue());
- if (CurrPos->isDebugValue()) {
- closeRegion();
- return false;
- }
SlotIndex SlotIdx;
if (RequireIntervals)
SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
@@ -496,8 +539,11 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
if (RequireIntervals && isTopClosed())
static_cast<IntervalPressure&>(P).openTop(SlotIdx);
- RegisterOperands RegOpers(TRI, MRI);
- collectOperands(CurrPos, RegOpers);
+ const MachineInstr &MI = *CurrPos;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI);
+ if (RequireIntervals)
+ RegOpers.detectDeadDefs(MI, *LIS);
if (PDiff)
collectPDiff(*PDiff, RegOpers, MRI);
@@ -508,37 +554,19 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
// Kill liveness at live defs.
// TODO: consider earlyclobbers?
- for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Defs[i];
- bool DeadDef = false;
- if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(Reg);
- if (LR) {
- LiveQueryResult LRQ = LR->Query(SlotIdx);
- DeadDef = LRQ.isDeadDef();
- }
- }
- if (DeadDef) {
- // LiveIntervals knows this is a dead even though it's MachineOperand is
- // not flagged as such. Since this register will not be recorded as
- // live-out, increase its PDiff value to avoid underflowing pressure.
- if (PDiff)
- PDiff->addPressureChange(Reg, false, MRI);
- } else {
- if (LiveRegs.erase(Reg))
- decreaseRegPressure(Reg);
- else
- discoverLiveOut(Reg);
- }
+ for (unsigned Reg : RegOpers.Defs) {
+ if (LiveRegs.erase(Reg))
+ decreaseRegPressure(Reg);
+ else
+ discoverLiveOut(Reg);
}
// Generate liveness for uses.
- for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Uses[i];
+ for (unsigned Reg : RegOpers.Uses) {
if (!LiveRegs.contains(Reg)) {
// Adjust liveouts if LiveIntervals are available.
if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(Reg);
+ const LiveRange *LR = getLiveRange(*LIS, Reg);
if (LR) {
LiveQueryResult LRQ = LR->Query(SlotIdx);
if (!LRQ.isKill() && !LRQ.valueDefined())
@@ -552,24 +580,18 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
}
}
if (TrackUntiedDefs) {
- for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Defs[i];
+ for (unsigned Reg : RegOpers.Defs) {
if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg))
UntiedDefs.insert(Reg);
}
}
- return true;
}
/// Advance across the current instruction.
-bool RegPressureTracker::advance() {
+void RegPressureTracker::advance() {
assert(!TrackUntiedDefs && "unsupported mode");
- // Check for the bottom of the analyzable region.
- if (CurrPos == MBB->end()) {
- closeRegion();
- return false;
- }
+ assert(CurrPos != MBB->end());
if (!isTopClosed())
closeTop();
@@ -585,11 +607,10 @@ bool RegPressureTracker::advance() {
static_cast<RegionPressure&>(P).openBottom(CurrPos);
}
- RegisterOperands RegOpers(TRI, MRI);
- collectOperands(CurrPos, RegOpers);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*CurrPos, *TRI, *MRI);
- for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Uses[i];
+ for (unsigned Reg : RegOpers.Uses) {
// Discover live-ins.
bool isLive = LiveRegs.contains(Reg);
if (!isLive)
@@ -597,24 +618,21 @@ bool RegPressureTracker::advance() {
// Kill liveness at last uses.
bool lastUse = false;
if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(Reg);
+ const LiveRange *LR = getLiveRange(*LIS, Reg);
lastUse = LR && LR->Query(SlotIdx).isKill();
- }
- else {
+ } else {
// Allocatable physregs are always single-use before register rewriting.
lastUse = !TargetRegisterInfo::isVirtualRegister(Reg);
}
if (lastUse && isLive) {
LiveRegs.erase(Reg);
decreaseRegPressure(Reg);
- }
- else if (!lastUse && !isLive)
+ } else if (!lastUse && !isLive)
increaseRegPressure(Reg);
}
// Generate liveness for defs.
- for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Defs[i];
+ for (unsigned Reg : RegOpers.Defs) {
if (LiveRegs.insert(Reg))
increaseRegPressure(Reg);
}
@@ -627,7 +645,6 @@ bool RegPressureTracker::advance() {
do
++CurrPos;
while (CurrPos != MBB->end() && CurrPos->isDebugValue());
- return true;
}
/// Find the max change in excess pressure across all sets.
@@ -653,8 +670,7 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
PDiff = 0; // Under the limit
else
PDiff = PNew - Limit; // Just exceeded limit.
- }
- else if (Limit > PNew)
+ } else if (Limit > PNew)
PDiff = Limit - POld; // Just obeyed limit.
if (PDiff) {
@@ -719,34 +735,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
// Account for register pressure similar to RegPressureTracker::recede().
- RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true);
- collectOperands(MI, RegOpers);
-
- // Boost max pressure for all dead defs together.
- // Since CurrSetPressure and MaxSetPressure
- increaseRegPressure(RegOpers.DeadDefs);
- decreaseRegPressure(RegOpers.DeadDefs);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true);
+ assert(RegOpers.DeadDefs.size() == 0);
+ if (RequireIntervals)
+ RegOpers.detectDeadDefs(*MI, *LIS);
// Kill liveness at live defs.
- for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Defs[i];
- bool DeadDef = false;
- if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(Reg);
- if (LR) {
- SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
- LiveQueryResult LRQ = LR->Query(SlotIdx);
- DeadDef = LRQ.isDeadDef();
- }
- }
- if (!DeadDef) {
- if (!containsReg(RegOpers.Uses, Reg))
- decreaseRegPressure(Reg);
- }
+ for (unsigned Reg : RegOpers.Defs) {
+ if (!containsReg(RegOpers.Uses, Reg))
+ decreaseRegPressure(Reg);
}
// Generate liveness for uses.
- for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Uses[i];
+ for (unsigned Reg : RegOpers.Uses) {
if (!LiveRegs.contains(Reg))
increaseRegPressure(Reg);
}
@@ -853,7 +854,8 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
unsigned MNew = MOld;
// Ignore DeadDefs here because they aren't captured by PressureChange.
unsigned PNew = POld + PDiffI->getUnitInc();
- assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow");
+ assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld)
+ && "PSet overflow/underflow");
if (PNew > MOld)
MNew = PNew;
// Check if current pressure has exceeded the limit.
@@ -892,19 +894,13 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
}
/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
-static bool findUseBetween(unsigned Reg,
- SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
- const MachineRegisterInfo *MRI,
+static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx,
+ SlotIndex NextUseIdx, const MachineRegisterInfo &MRI,
const LiveIntervals *LIS) {
- for (MachineRegisterInfo::use_instr_nodbg_iterator
- UI = MRI->use_instr_nodbg_begin(Reg),
- UE = MRI->use_instr_nodbg_end(); UI != UE; ++UI) {
- const MachineInstr* MI = &*UI;
- if (MI->isDebugValue())
- continue;
- SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
- if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
- return true;
+ for (const MachineInstr &MI : MRI.use_nodbg_instructions(Reg)) {
+ SlotIndex InstSlot = LIS->getInstructionIndex(&MI).getRegSlot();
+ if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
+ return true;
}
return false;
}
@@ -919,8 +915,8 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
// Account for register pressure similar to RegPressureTracker::recede().
- RegisterOperands RegOpers(TRI, MRI);
- collectOperands(MI, RegOpers);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, *MRI);
// Kill liveness at last uses. Assume allocatable physregs are single-use
// rather than checking LiveIntervals.
@@ -928,21 +924,18 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
if (RequireIntervals)
SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
- for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Uses[i];
+ for (unsigned Reg : RegOpers.Uses) {
if (RequireIntervals) {
// FIXME: allow the caller to pass in the list of vreg uses that remain
// to be bottom-scheduled to avoid searching uses at each query.
SlotIndex CurrIdx = getCurrSlot();
- const LiveRange *LR = getLiveRange(Reg);
+ const LiveRange *LR = getLiveRange(*LIS, Reg);
if (LR) {
LiveQueryResult LRQ = LR->Query(SlotIdx);
- if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
+ if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS))
decreaseRegPressure(Reg);
- }
}
- }
- else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
// Allocatable physregs are always single-use before register rewriting.
decreaseRegPressure(Reg);
}
@@ -966,7 +959,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
/// This is expensive for an on-the-fly query because it calls
/// bumpDownwardPressure to recompute the pressure sets based on current
/// liveness. We don't yet have a fast version of downward pressure tracking
-/// analagous to getUpwardPressureDelta.
+/// analogous to getUpwardPressureDelta.
void RegPressureTracker::
getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
ArrayRef<PressureChange> CriticalPSets,
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index 4176686..8fa1bf7 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -31,9 +31,12 @@ using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
/// setUsed - Set the register units of this register as used.
-void RegScavenger::setRegUsed(unsigned Reg) {
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
- RegUnitsAvailable.reset(*RUI);
+void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
+ for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
+ LaneBitmask UnitMask = (*RUI).second;
+ if (UnitMask == 0 || (LaneMask & UnitMask) != 0)
+ RegUnitsAvailable.reset((*RUI).first);
+ }
}
void RegScavenger::initRegState() {
@@ -50,9 +53,8 @@ void RegScavenger::initRegState() {
return;
// Live-in registers are in use.
- for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
- E = MBB->livein_end(); I != E; ++I)
- setRegUsed(*I);
+ for (const auto &LI : MBB->liveins())
+ setRegUsed(LI.PhysReg, LI.LaneMask);
// Pristine CSRs are also unavailable.
const MachineFunction &MF = *MBB->getParent();
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 76a7fef..efde61e 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -372,7 +372,6 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << "\n";
}
}
- dbgs() << "\n";
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 390b6d2..fb82ab7 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,12 +13,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/ADT/IntEqClasses.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -51,15 +51,11 @@ static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
- bool IsPostRAFlag, bool RemoveKillFlags,
- LiveIntervals *lis)
- : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
- IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
- CanHandleTerminators(false), FirstDbgValue(nullptr) {
- assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
+ bool RemoveKillFlags)
+ : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
+ RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
+ TrackLaneMasks(false), FirstDbgValue(nullptr) {
DbgValues.clear();
- assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
- "Virtual registers must be removed prior to PostRA scheduling");
const TargetSubtargetInfo &ST = mf.getSubtarget();
SchedModel.init(ST.getSchedModel(), &ST, TII);
@@ -230,11 +226,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
if (TRI->isPhysicalRegister(Reg))
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
- else {
- assert(!IsPostRA && "Virtual register encountered after regalloc.");
- if (MO.readsReg()) // ignore undef operands
- addVRegUseDeps(&ExitSU, i);
- }
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(&ExitSU, i);
}
} else {
// For others, e.g. fallthrough, conditional branch, assume the exit
@@ -242,11 +235,9 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
assert(Uses.empty() && "Uses in set before adding deps?");
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
- E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- if (!Uses.contains(Reg))
- Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+ for (const auto &LI : (*SI)->liveins()) {
+ if (!Uses.contains(LI.PhysReg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
}
}
}
@@ -371,6 +362,20 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
}
}
+LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
+{
+ unsigned Reg = MO.getReg();
+ // No point in tracking lanemasks if we don't have interesting subregisters.
+ const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
+ if (!RC.HasDisjunctSubRegs)
+ return ~0u;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0)
+ return RC.getLaneMask();
+ return TRI->getSubRegIndexLaneMask(SubReg);
+}
+
/// addVRegDefDeps - Add register output and data dependencies from this SUnit
/// to instructions that occur later in the same scheduling region if they read
/// from or write to the virtual register defined at OperIdx.
@@ -378,35 +383,106 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
/// TODO: Hoist loop induction variable increments. This has to be
/// reevaluated. Generally, IV scheduling should be done before coalescing.
void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
- const MachineInstr *MI = SU->getInstr();
- unsigned Reg = MI->getOperand(OperIdx).getReg();
+ MachineInstr *MI = SU->getInstr();
+ MachineOperand &MO = MI->getOperand(OperIdx);
+ unsigned Reg = MO.getReg();
+
+ LaneBitmask DefLaneMask;
+ LaneBitmask KillLaneMask;
+ if (TrackLaneMasks) {
+ bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
+ DefLaneMask = getLaneMaskForMO(MO);
+ // If we have a <read-undef> flag, none of the lane values comes from an
+ // earlier instruction.
+ KillLaneMask = IsKill ? ~0u : DefLaneMask;
+
+ // Clear undef flag, we'll re-add it later once we know which subregister
+ // Def is first.
+ MO.setIsUndef(false);
+ } else {
+ DefLaneMask = ~0u;
+ KillLaneMask = ~0u;
+ }
+
+ if (MO.isDead()) {
+ assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
+ "Dead defs should have no uses");
+ } else {
+ // Add data dependence to all uses we found so far.
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
+ E = CurrentVRegUses.end(); I != E; /*empty*/) {
+ LaneBitmask LaneMask = I->LaneMask;
+ // Ignore uses of other lanes.
+ if ((LaneMask & KillLaneMask) == 0) {
+ ++I;
+ continue;
+ }
+
+ if ((LaneMask & DefLaneMask) != 0) {
+ SUnit *UseSU = I->SU;
+ MachineInstr *Use = UseSU->getInstr();
+ SDep Dep(SU, SDep::Data, Reg);
+ Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
+ I->OperandIndex));
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ UseSU->addPred(Dep);
+ }
+
+ LaneMask &= ~KillLaneMask;
+ // If we found a Def for all lanes of this use, remove it from the list.
+ if (LaneMask != 0) {
+ I->LaneMask = LaneMask;
+ ++I;
+ } else
+ I = CurrentVRegUses.erase(I);
+ }
+ }
- // Singly defined vregs do not have output/anti dependencies.
- // The current operand is a def, so we have at least one.
- // Check here if there are any others...
+ // Shortcut: Singly defined vregs do not have output/anti dependencies.
if (MRI.hasOneDef(Reg))
return;
- // Add output dependence to the next nearest def of this vreg.
+ // Add output dependence to the next nearest defs of this vreg.
//
// Unless this definition is dead, the output dependence should be
// transitively redundant with antidependencies from this definition's
// uses. We're conservative for now until we have a way to guarantee the uses
// are not eliminated sometime during scheduling. The output dependence edge
// is also useful if output latency exceeds def-use latency.
- VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
- if (DefI == VRegDefs.end())
- VRegDefs.insert(VReg2SUnit(Reg, SU));
- else {
- SUnit *DefSU = DefI->SU;
- if (DefSU != SU && DefSU != &ExitSU) {
- SDep Dep(SU, SDep::Output, Reg);
- Dep.setLatency(
- SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
- DefSU->addPred(Dep);
- }
- DefI->SU = SU;
+ LaneBitmask LaneMask = DefLaneMask;
+ for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+ CurrentVRegDefs.end())) {
+ // Ignore defs for other lanes.
+ if ((V2SU.LaneMask & LaneMask) == 0)
+ continue;
+ // Add an output dependence.
+ SUnit *DefSU = V2SU.SU;
+ // Ignore additional defs of the same lanes in one instruction. This can
+ // happen because lanemasks are shared for targets with too many
+ // subregisters. We also use some representration tricks/hacks where we
+ // add super-register defs/uses, to imply that although we only access parts
+ // of the reg we care about the full one.
+ if (DefSU == SU)
+ continue;
+ SDep Dep(SU, SDep::Output, Reg);
+ Dep.setLatency(
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
+ DefSU->addPred(Dep);
+
+ // Update current definition. This can get tricky if the def was about a
+ // bigger lanemask before. We then have to shrink it and create a new
+ // VReg2SUnit for the non-overlapping part.
+ LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
+ LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
+ if (NonOverlapMask != 0)
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU));
+ V2SU.SU = SU;
+ V2SU.LaneMask = OverlapMask;
}
+ // If there was no CurrentVRegDefs entry for some lanes yet, create one.
+ if (LaneMask != 0)
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
}
/// addVRegUseDeps - Add a register data dependency if the instruction that
@@ -416,59 +492,34 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
///
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
- MachineInstr *MI = SU->getInstr();
- unsigned Reg = MI->getOperand(OperIdx).getReg();
+ const MachineInstr *MI = SU->getInstr();
+ const MachineOperand &MO = MI->getOperand(OperIdx);
+ unsigned Reg = MO.getReg();
+
+ // Remember the use. Data dependencies will be added when we find the def.
+ LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
+ CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
+
+ // Add antidependences to the following defs of the vreg.
+ for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+ CurrentVRegDefs.end())) {
+ // Ignore defs for unrelated lanes.
+ LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
+ if ((PrevDefLaneMask & LaneMask) == 0)
+ continue;
+ if (V2SU.SU == SU)
+ continue;
- // Record this local VReg use.
- VReg2UseMap::iterator UI = VRegUses.find(Reg);
- for (; UI != VRegUses.end(); ++UI) {
- if (UI->SU == SU)
- break;
+ V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
}
- if (UI == VRegUses.end())
- VRegUses.insert(VReg2SUnit(Reg, SU));
-
- // Lookup this operand's reaching definition.
- assert(LIS && "vreg dependencies requires LiveIntervals");
- LiveQueryResult LRQ
- = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
- VNInfo *VNI = LRQ.valueIn();
-
- // VNI will be valid because MachineOperand::readsReg() is checked by caller.
- assert(VNI && "No value to read by operand");
- MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
- // Phis and other noninstructions (after coalescing) have a NULL Def.
- if (Def) {
- SUnit *DefSU = getSUnit(Def);
- if (DefSU) {
- // The reaching Def lives within this scheduling region.
- // Create a data dependence.
- SDep dep(DefSU, SDep::Data, Reg);
- // Adjust the dependence latency using operand def/use information, then
- // allow the target to perform its own adjustments.
- int DefOp = Def->findRegisterDefOperandIdx(Reg);
- dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
-
- const TargetSubtargetInfo &ST = MF.getSubtarget();
- ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
- SU->addPred(dep);
- }
- }
-
- // Add antidependence to the following def of the vreg it uses.
- VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
- if (DefI != VRegDefs.end() && DefI->SU != SU)
- DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
}
/// Return true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
- if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasOrderedMemoryRef() &&
- (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
- return true;
- return false;
+ return MI->isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasOrderedMemoryRef() &&
+ (!MI->mayLoad() || !MI->isInvariantLoad(AA)));
}
// This MI might have either incomplete info, or known to be unsafe
@@ -508,7 +559,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
return false;
}
-/// This returns true if the two MIs need a chain edge betwee them.
+/// This returns true if the two MIs need a chain edge between them.
/// If these are not even memory operations, we still may need
/// chain deps between them. The question really is - could
/// these two MIs be reordered during scheduling from memory dependency
@@ -670,7 +721,7 @@ static inline void addChainDependency(AliasAnalysis *AA,
unsigned TrueMemOrderLatency = 0,
bool isNormalMemory = false) {
// If this is a false dependency,
- // do not add the edge, but rememeber the rejected node.
+ // do not add the edge, but remember the rejected node.
if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
@@ -685,7 +736,7 @@ static inline void addChainDependency(AliasAnalysis *AA,
}
}
-/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// Create an SUnit for each real instruction, numbered in top-down topological
/// order. The instruction order A < B, implies that no edge exists from B to A.
///
/// Map each real instruction to its SUnit.
@@ -743,17 +794,44 @@ void ScheduleDAGInstrs::initSUnits() {
}
}
+void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
+ const MachineInstr *MI = SU->getInstr();
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (!MO.readsReg())
+ continue;
+ if (TrackLaneMasks && !MO.isUse())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // Record this local VReg use.
+ VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
+ for (; UI != VRegUses.end(); ++UI) {
+ if (UI->SU == SU)
+ break;
+ }
+ if (UI == VRegUses.end())
+ VRegUses.insert(VReg2SUnit(Reg, 0, SU));
+ }
+}
+
/// If RegPressure is non-null, compute register pressure as a side effect. The
/// DAG builder is an efficient place to do it because it already visits
/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
- PressureDiffs *PDiffs) {
+ PressureDiffs *PDiffs,
+ bool TrackLaneMasks) {
const TargetSubtargetInfo &ST = MF.getSubtarget();
bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
: ST.useAA();
AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
+ this->TrackLaneMasks = TrackLaneMasks;
MISUnitMap.clear();
ScheduleDAG::clearDAG();
@@ -766,7 +844,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// We build scheduling units by walking a block's instruction list from bottom
// to top.
- // Remember where a generic side-effecting instruction is as we procede.
+ // Remember where a generic side-effecting instruction is as we proceed.
SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
// Memory references to specific known memory locations are tracked
@@ -787,10 +865,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Defs.setUniverse(TRI->getNumRegs());
Uses.setUniverse(TRI->getNumRegs());
- assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
+ assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
+ assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
+ unsigned NumVirtRegs = MRI.getNumVirtRegs();
+ CurrentVRegDefs.setUniverse(NumVirtRegs);
+ CurrentVRegUses.setUniverse(NumVirtRegs);
+
VRegUses.clear();
- VRegDefs.setUniverse(MRI.getNumVirtRegs());
- VRegUses.setUniverse(MRI.getNumVirtRegs());
+ VRegUses.setUniverse(NumVirtRegs);
// Model data dependencies between instructions being scheduled and the
// ExitSU.
@@ -818,6 +900,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
assert(RPTracker->getPos() == std::prev(MII) &&
"RPTracker can't find MI");
+ collectVRegUses(SU);
}
assert(
@@ -835,7 +918,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (TRI->isPhysicalRegister(Reg))
addPhysRegDeps(SU, j);
else {
- assert(!IsPostRA && "Virtual register encountered!");
if (MO.isDef()) {
HasVRegDef = true;
addVRegDefDeps(SU, j);
@@ -890,7 +972,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
BarrierChain = SU;
// This is a barrier event that acts as a pivotal node in the DAG,
// so it is safe to clear list of exposed nodes.
- adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
RejectMemNodes.clear();
NonAliasMemDefs.clear();
@@ -903,27 +985,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
unsigned ChainLatency = 0;
if (AliasChain->getInstr()->mayLoad())
ChainLatency = TrueMemOrderLatency;
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes, ChainLatency);
}
AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes);
}
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes, TrueMemOrderLatency);
}
- adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
PendingLoads.clear();
AliasMemDefs.clear();
@@ -937,7 +1019,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
BarrierChain->addPred(SDep(SU, SDep::Barrier));
UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
if (Objs.empty()) {
// Treat all other stores conservatively.
@@ -961,7 +1043,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes, 0, true);
// If we're not using AA, then we only need one store per object.
@@ -986,7 +1068,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
J->second[i], RejectMemNodes,
TrueMemOrderLatency, true);
J->second.clear();
@@ -996,15 +1078,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Add dependencies from all the PendingLoads, i.e. loads
// with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
// Add dependence on alias chain, if needed.
if (AliasChain)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes);
}
- adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
} else if (MI->mayLoad()) {
bool MayAlias = true;
@@ -1012,7 +1094,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Invariant load, no chain dependencies needed!
} else {
UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
if (Objs.empty()) {
// A load with no underlying object. Depend on all
@@ -1020,7 +1102,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes);
PendingLoads.push_back(SU);
@@ -1044,7 +1126,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes, 0, true);
if (ThisMayAlias)
AliasMemUses[V].push_back(SU);
@@ -1052,11 +1134,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
NonAliasMemUses[V].push_back(SU);
}
if (MayAlias)
- adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU,
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
@@ -1068,7 +1150,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Defs.clear();
Uses.clear();
- VRegDefs.clear();
+ CurrentVRegDefs.clear();
+ CurrentVRegUses.clear();
PendingLoads.clear();
}
@@ -1080,11 +1163,9 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
// Examine the live-in regs of all successors.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI) {
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
- E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
+ for (const auto &LI : (*SI)->liveins()) {
// Repeat, for reg and all subregs.
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
LiveRegs.set(*SubRegs);
}
@@ -1103,7 +1184,7 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
// Once we set a kill flag on an instruction, we bail out, as otherwise we
// might set it on too many operands. We will clear as many flags as we
// can though.
- MachineBasicBlock::instr_iterator Begin = MI;
+ MachineBasicBlock::instr_iterator Begin = MI->getIterator();
MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
while (Begin != End) {
for (MachineOperand &MO : (--End)->operands()) {
@@ -1237,7 +1318,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
toggleKillFlag(MI, MO);
DEBUG(MI->dump());
DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {
- MachineBasicBlock::instr_iterator Begin = MI;
+ MachineBasicBlock::instr_iterator Begin = MI->getIterator();
MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
while (++Begin != End)
DEBUG(Begin->dump());
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index b2e4617..1150d26 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -43,9 +43,12 @@ namespace llvm {
return (Node->NumPreds > 10 || Node->NumSuccs > 10);
}
- static bool hasNodeAddressLabel(const SUnit *Node,
- const ScheduleDAG *Graph) {
- return true;
+ static std::string getNodeIdentifierLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+ OS << static_cast<const void *>(Node);
+ return R;
}
/// If you want to override the dot attributes printed for a particular
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3b29306..0872d7a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -156,13 +156,16 @@ namespace {
void deleteAndRecombine(SDNode *N);
bool recursivelyDeleteUnusedNodes(SDNode *N);
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
return CombineTo(N, &Res, 1, AddTo);
}
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
bool AddTo = true) {
SDValue To[] = { Res0, Res1 };
@@ -233,18 +236,17 @@ namespace {
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
+ SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
- SDValue visitSREM(SDNode *N);
- SDValue visitUREM(SDNode *N);
+ SDValue visitREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitSMULO(SDNode *N);
SDValue visitUMULO(SDNode *N);
- SDValue visitSDIVREM(SDNode *N);
- SDValue visitUDIVREM(SDNode *N);
+ SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitOR(SDNode *N);
@@ -265,6 +267,7 @@ namespace {
SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
+ SDValue visitSETCCE(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
@@ -298,6 +301,10 @@ namespace {
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
+
+ SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
+ SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+
SDValue visitSTORE(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
@@ -312,9 +319,11 @@ namespace {
SDValue visitMGATHER(SDNode *N);
SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
+ SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
+ SDValue visitFMULForFMACombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -338,14 +347,17 @@ namespace {
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
+ SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
- SDValue BuildReciprocalEstimate(SDValue Op);
- SDValue BuildRsqrtEstimate(SDValue Op);
- SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
- SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
+ SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags);
+ SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -374,6 +386,10 @@ namespace {
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
+ /// Do FindBetterChain for a store and any possibly adjacent stores on
+ /// consecutive chains.
+ bool findBetterNeighborChains(StoreSDNode *St);
+
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -388,19 +404,37 @@ namespace {
unsigned SequenceNum;
};
+ /// This is a helper function for visitMUL to check the profitability
+ /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+ /// MulNode is the original multiply, AddNode is (add x, c1),
+ /// and ConstNode is c2.
+ bool isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode);
+
/// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
/// constant build_vector of the stored constant values in Stores.
SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
SDLoc SL,
ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains,
EVT Ty) const;
+ /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
+ /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
+ /// the type of the loaded value to be extended. LoadedVT returns the type
+ /// of the original loaded value. NarrowLoad returns whether the load would
+ /// need to be narrowed in order to match.
+ bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad);
+
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
/// \return True if a merged store was created.
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
- EVT MemVT, unsigned NumElem,
+ EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector);
/// This is a helper function for MergeConsecutiveStores.
@@ -409,7 +443,7 @@ namespace {
void getStoreMergeAndAliasCandidates(
StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
-
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
@@ -427,9 +461,7 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
- auto *F = DAG.getMachineFunction().getFunction();
- ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
- F->hasFnAttribute(Attribute::MinSize);
+ ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
}
/// Runs the dag combiner on all nodes in the work list
@@ -606,6 +638,9 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
assert(Op.hasOneUse() && "Unknown reuse!");
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+
+ const SDNodeFlags *Flags = Op.getNode()->getFlags();
+
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {
@@ -623,12 +658,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
- Op.getOperand(1));
+ Op.getOperand(1), Flags);
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1),
- Op.getOperand(0));
+ Op.getOperand(0), Flags);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
assert(Options.UnsafeFPMath);
@@ -640,7 +675,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
// fold (fneg (fsub A, B)) -> (fsub B, A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- Op.getOperand(1), Op.getOperand(0));
+ Op.getOperand(1), Op.getOperand(0), Flags);
case ISD::FMUL:
case ISD::FDIV:
@@ -652,13 +687,13 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
- Op.getOperand(1));
+ Op.getOperand(1), Flags);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1));
+ LegalOperations, Depth+1), Flags);
case ISD::FP_EXTEND:
case ISD::FSIN:
@@ -1216,9 +1251,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I)
- AddToWorklist(I);
+ for (SDNode &Node : DAG.allnodes())
+ AddToWorklist(&Node);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -1333,16 +1367,18 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
- case ISD::SREM: return visitSREM(N);
- case ISD::UREM: return visitUREM(N);
+ case ISD::SREM:
+ case ISD::UREM: return visitREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO: return visitSMULO(N);
case ISD::UMULO: return visitUMULO(N);
- case ISD::SDIVREM: return visitSDIVREM(N);
- case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: return visitIMINMAX(N);
case ISD::AND: return visitAND(N);
case ISD::OR: return visitOR(N);
case ISD::XOR: return visitXOR(N);
@@ -1361,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
+ case ISD::SETCCE: return visitSETCCE(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
@@ -1408,6 +1445,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
+ case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
}
return SDValue();
}
@@ -1470,13 +1508,8 @@ SDValue DAGCombiner::combine(SDNode *N) {
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
SDValue Ops[] = {N1, N0};
- SDNode *CSENode;
- if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
- CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
- &BinNode->Flags);
- } else {
- CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
- }
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+ N->getFlags());
if (CSENode)
return SDValue(CSENode, 0);
}
@@ -1595,26 +1628,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-static bool isNullConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
-}
-
-static bool isNullFPConstant(SDValue V) {
- ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
- return Const != nullptr && Const->isZero() && !Const->isNegative();
-}
-
-static bool isAllOnesConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isAllOnesValue();
-}
-
-static bool isOneConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isOne();
-}
-
/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
/// ContantSDNode pointer else nullptr.
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
@@ -1721,22 +1734,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
- if (VT.isInteger() && !VT.isVector()) {
- APInt LHSZero, LHSOne;
- APInt RHSZero, RHSOne;
- DAG.computeKnownBits(N0, LHSZero, LHSOne);
-
- if (LHSZero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSZero, RHSOne);
-
- // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
- // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
- if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
- }
- }
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -1971,31 +1971,26 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// If the flag result is dead, turn this into an SUB.
if (!N->hasAnyUseOfValue(1))
- return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
- if (N0 == N1) {
- SDLoc DL(N);
+ if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, DL, VT),
- DAG.getNode(ISD::CARRY_FALSE, DL,
- MVT::Glue));
- }
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, 0) -> x + no borrow
if (isNullConstant(N1))
- return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (isAllOnesConstant(N0))
- return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
return SDValue();
}
@@ -2130,14 +2125,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
- (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1))))
- return DAG.getNode(ISD::ADD, SDLoc(N), VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT,
- N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT,
- N0.getOperand(1), N1));
+ if (isConstantIntBuildVectorOrConstantInt(N1) &&
+ N0.getOpcode() == ISD::ADD &&
+ isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ isMulAddWithConstProfitable(N, N0, N1))
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT,
+ DAG.getNode(ISD::MUL, SDLoc(N0), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT,
+ N0.getOperand(1), N1));
// reassociate mul
if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
@@ -2146,6 +2142,88 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return SDValue();
}
+/// Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: return false; // No libcall for vector types.
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Issue divrem if both quotient and remainder are needed.
+SDValue DAGCombiner::useDivRem(SDNode *Node) {
+ if (Node->use_empty())
+ return SDValue(); // This is a dead node, leave it alone.
+
+ EVT VT = Node->getValueType(0);
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ // If DIVREM is going to get expanded into a libcall,
+ // but there is no libcall available, then don't combine.
+ if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
+ !isDivRemLibcallAvailable(Node, isSigned, TLI))
+ return SDValue();
+
+ // If div is legal, it's better to do the normal expansion
+ unsigned OtherOpcode = 0;
+ if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
+ OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
+ if (TLI.isOperationLegalOrCustom(Opcode, VT))
+ return SDValue();
+ } else {
+ OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
+ return SDValue();
+ }
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue combined;
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node || User->use_empty())
+ continue;
+ // Convert the other matching node(s), too;
+ // otherwise, the DIVREM may get target-legalized into something
+ // target-specific that we won't be able to recognize.
+ unsigned UserOpc = User->getOpcode();
+ if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1) {
+ if (!combined) {
+ if (UserOpc == OtherOpcode) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
+ } else if (UserOpc == DivRemOpc) {
+ combined = SDValue(User, 0);
+ } else {
+ assert(UserOpc == Opcode);
+ continue;
+ }
+ }
+ if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
+ CombineTo(User, combined);
+ else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
+ CombineTo(User, combined.getValue(1));
+ }
+ }
+ return combined;
+}
+
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2156,26 +2234,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
+ SDLoc DL(N);
+
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
if (N1C && N1C->isOne())
return N0;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue()) {
- SDLoc DL(N);
+ if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
- }
+
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
- N0, N1);
+ return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
@@ -2186,18 +2264,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
!cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
(N1C->getAPIntValue().isPowerOf2() ||
(-N1C->getAPIntValue()).isPowerOf2())) {
- // If dividing by powers of two is cheap, then don't perform the following
- // fold.
- if (TLI.isPow2SDivCheap())
- return SDValue();
-
// Target-specific implementation of sdiv x, pow2.
- SDValue Res = BuildSDIVPow2(N);
- if (Res.getNode())
+ if (SDValue Res = BuildSDIVPow2(N))
return Res;
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
- SDLoc DL(N);
// Splat the sign bit into the register
SDValue SGN =
@@ -2228,15 +2299,23 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
}
// If integer divide is expensive and we satisfy the requirements, emit an
- // alternate sequence.
- if (N1C && !TLI.isIntDivCheap()) {
- SDValue Op = BuildSDIV(N);
- if (Op.getNode()) return Op;
- }
+ // alternate sequence. Targets may check function attributes for size/speed
+ // trade-offs.
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildSDIV(N))
+ return Op;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2254,26 +2333,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
+ SDLoc DL(N);
+
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+ if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
// fold (udiv x, (1 << c)) -> x >>u c
- if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
+ if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SRL, DL, VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
getShiftAmountTy(N0.getValueType())));
- }
+
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
EVT ADDVT = N1.getOperand(1).getValueType();
- SDLoc DL(N);
SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
@@ -2284,15 +2363,23 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
}
}
+
// fold (udiv x, c) -> alternate
- if (N1C && !TLI.isIntDivCheap()) {
- SDValue Op = BuildUDIV(N);
- if (Op.getNode()) return Op;
- }
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildUDIV(N))
+ return Op;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2300,102 +2387,83 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSREM(SDNode *N) {
+// handles ISD::SREM and ISD::UREM
+SDValue DAGCombiner::visitREM(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ bool isSigned = (Opcode == ISD::SREM);
+ SDLoc DL(N);
- // fold (srem c1, c2) -> c1%c2
+ // fold (rem c1, c2) -> c1%c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
- N0C, N1C))
+ if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
- // If we know the sign bits of both operands are zero, strength reduce to a
- // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
- if (!VT.isVector()) {
- if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
- }
- // If X/C can be simplified by the division-by-constant logic, lower
- // X%C to the equivalent of X-X/C*C.
- if (N1C && !N1C->isNullValue()) {
- SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
- AddToWorklist(Div.getNode());
- SDValue OptimizedDiv = combine(Div.getNode());
- if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
- OptimizedDiv, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
- AddToWorklist(Mul.getNode());
- return Sub;
+ if (isSigned) {
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
}
- }
-
- // undef % X -> 0
- if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
- // X % undef -> undef
- if (N1.getOpcode() == ISD::UNDEF)
- return N1;
-
- return SDValue();
-}
-
-SDValue DAGCombiner::visitUREM(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- EVT VT = N->getValueType(0);
-
- // fold (urem c1, c2) -> c1%c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
- N0C, N1C))
- return Folded;
- // fold (urem x, pow2) -> (and x, pow2-1)
- if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
- N1C->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
- return DAG.getNode(ISD::AND, DL, VT, N0,
- DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
- }
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
- if (N1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
- if (SHC->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1,
+ } else {
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+ N1C->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::AND, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+ }
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1,
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
VT));
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ }
}
}
}
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
- if (N1C && !N1C->isNullValue()) {
- SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
+ // To avoid mangling nodes, this simplification requires that the combine()
+ // call for the speculative DIV must not cause a DIVREM conversion. We guard
+ // against this by skipping the simplification if isIntDivCheap(). When
+ // div is not cheap, combine will not return a DIVREM. Regardless,
+ // checking cheapness here makes sense since the simplification results in
+ // fatter code.
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
- OptimizedDiv, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
+ assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
+ (OptimizedDiv.getOpcode() != ISD::SDIVREM));
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
AddToWorklist(Mul.getNode());
return Sub;
}
}
+ // sdiv, srem -> sdivrem
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem.getValue(1);
+
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2532,8 +2600,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
}
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
- if (Res.getNode()) return Res;
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
+ return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
@@ -2563,8 +2631,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
}
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
- if (Res.getNode()) return Res;
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
+ return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
@@ -2613,16 +2681,26 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
- if (Res.getNode()) return Res;
+SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
- return SDValue();
-}
+ // fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
-SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
- if (Res.getNode()) return Res;
+ // canonicalize constant to RHS
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
return SDValue();
}
@@ -2848,10 +2926,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getSimpleValueType())))))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
+ TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (N0.getValueType() == CCVT ||
+ (!LegalOperations && N0.getValueType() == MVT::i1))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
}
}
@@ -2887,6 +2968,46 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
return SDValue();
}
+bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad) {
+ uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
+
+ if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+ return false;
+
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ LoadedVT = LoadN->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
+ // ZEXTLOAD will match without needing to change the size of the value being
+ // loaded.
+ NarrowLoad = false;
+ return true;
+ }
+
+ // Do not change the width of a volatile load.
+ if (LoadN->isVolatile())
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
+ return false;
+
+ NarrowLoad = true;
+ return true;
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3079,16 +3200,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
: cast<LoadSDNode>(N0);
if (LN0->getExtensionType() != ISD::SEXTLOAD &&
LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
- uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
- if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
- EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
- EVT LoadedVT = LN0->getMemoryVT();
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-
- if (ExtVT == LoadedVT &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
- ExtVT))) {
-
+ auto NarrowLoad = false;
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad)) {
+ if (!NarrowLoad) {
SDValue NewLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(), ExtVT,
@@ -3096,14 +3213,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
AddToWorklist(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
-
- // Do not change the width of a volatile load.
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
- ExtVT))) {
+ } else {
EVT PtrType = LN0->getOperand(1).getValueType();
unsigned Alignment = LN0->getAlignment();
@@ -3142,10 +3252,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return Combined;
// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
// fold (and (sra)) -> (and (srl)) when possible.
@@ -3507,10 +3616,13 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getValueType())))))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
+ TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (N0.getValueType() == CCVT ||
+ (!LegalOperations && N0.getValueType() == MVT::i1))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
}
}
@@ -3665,11 +3777,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return Combined;
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
- SDValue BSwap = MatchBSwapHWord(N, N0, N1);
- if (BSwap.getNode())
+ if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
return BSwap;
- BSwap = MatchBSwapHWordLow(N, N0, N1);
- if (BSwap.getNode())
+ if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
return BSwap;
// reassociate or
@@ -3690,10 +3800,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
@@ -3710,7 +3819,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
if (Op.getOpcode() == ISD::AND) {
- if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
Mask = Op.getOperand(1);
Op = Op.getOperand(0);
} else {
@@ -3727,105 +3836,106 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
}
// Return true if we can prove that, whenever Neg and Pos are both in the
-// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that
+// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
//
// (or (shift1 X, Neg), (shift2 X, Pos))
//
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
-// in direction shift1 by Neg. The range [0, OpSize) means that we only need
+// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
-static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
- // If OpSize is a power of 2 then:
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+ // If EltSize is a power of 2 then:
//
- // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
- // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
+ // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
+ // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
//
- // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
+ // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
// for the stronger condition:
//
- // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A]
+ // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
//
- // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
+ // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
// we can just replace Neg with Neg' for the rest of the function.
//
// In other cases we check for the even stronger condition:
//
- // Neg == OpSize - Pos [B]
+ // Neg == EltSize - Pos [B]
//
// for all Neg and Pos. Note that the (or ...) then invokes undefined
- // behavior if Pos == 0 (and consequently Neg == OpSize).
+ // behavior if Pos == 0 (and consequently Neg == EltSize).
//
- // We could actually use [A] whenever OpSize is a power of 2, but the
+ // We could actually use [A] whenever EltSize is a power of 2, but the
// only extra cases that it would match are those uninteresting ones
// where Neg and Pos are never in range at the same time. E.g. for
- // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+ // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
// as well as (sub 32, Pos), but:
//
// (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
//
// always invokes undefined behavior for 32-bit X.
//
- // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
+ // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
unsigned MaskLoBits = 0;
- if (Neg.getOpcode() == ISD::AND &&
- isPowerOf2_64(OpSize) &&
- Neg.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
- Neg = Neg.getOperand(0);
- MaskLoBits = Log2_64(OpSize);
+ if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+ if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
+ if (NegC->getAPIntValue() == EltSize - 1) {
+ Neg = Neg.getOperand(0);
+ MaskLoBits = Log2_64(EltSize);
+ }
+ }
}
// Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
if (Neg.getOpcode() != ISD::SUB)
- return 0;
- ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
+ return false;
+ ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
if (!NegC)
- return 0;
+ return false;
SDValue NegOp1 = Neg.getOperand(1);
- // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
+ // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
// Pos'. The truncation is redundant for the purpose of the equality.
- if (MaskLoBits &&
- Pos.getOpcode() == ISD::AND &&
- Pos.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
- Pos = Pos.getOperand(0);
+ if (MaskLoBits && Pos.getOpcode() == ISD::AND)
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ if (PosC->getAPIntValue() == EltSize - 1)
+ Pos = Pos.getOperand(0);
// The condition we need is now:
//
- // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
+ // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
//
// If NegOp1 == Pos then we need:
//
- // OpSize & Mask == NegC & Mask
+ // EltSize & Mask == NegC & Mask
//
// (because "x & Mask" is a truncation and distributes through subtraction).
APInt Width;
if (Pos == NegOp1)
Width = NegC->getAPIntValue();
+
// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
// Then the condition we want to prove becomes:
//
- // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
+ // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
//
// which, again because "x & Mask" is a truncation, becomes:
//
- // NegC & Mask == (OpSize - PosC) & Mask
- // OpSize & Mask == (NegC + PosC) & Mask
- else if (Pos.getOpcode() == ISD::ADD &&
- Pos.getOperand(0) == NegOp1 &&
- Pos.getOperand(1).getOpcode() == ISD::Constant)
- Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
- NegC->getAPIntValue());
- else
+ // NegC & Mask == (EltSize - PosC) & Mask
+ // EltSize & Mask == (NegC + PosC) & Mask
+ else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ Width = PosC->getAPIntValue() + NegC->getAPIntValue();
+ else
+ return false;
+ } else
return false;
- // Now we just need to check that OpSize & Mask == Width & Mask.
+ // Now we just need to check that EltSize & Mask == Width & Mask.
if (MaskLoBits)
- // Opsize & Mask is 0 since Mask is Opsize - 1.
+ // EltSize & Mask is 0 since Mask is EltSize - 1.
return Width.getLoBits(MaskLoBits) == 0;
- return Width == OpSize;
+ return Width == EltSize;
}
// A subroutine of MatchRotate used once we have found an OR of two opposite
@@ -3845,7 +3955,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg).getNode();
@@ -3888,10 +3998,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
std::swap(LHSShift, RHSShift);
- std::swap(LHSMask , RHSMask );
+ std::swap(LHSMask, RHSMask);
}
- unsigned OpSizeInBits = VT.getSizeInBits();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
SDValue RHSShiftArg = RHSShift.getOperand(0);
@@ -3899,11 +4009,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- if (LHSShiftAmt.getOpcode() == ISD::Constant &&
- RHSShiftAmt.getOpcode() == ISD::Constant) {
- uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
- uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
- if ((LShVal + RShVal) != OpSizeInBits)
+ if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
+ uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != EltSizeInBits)
return nullptr;
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
@@ -3911,18 +4020,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
- APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+ APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
+ SDValue Mask = DAG.getConstant(AllBits, DL, VT);
if (LHSMask.getNode()) {
- APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
- Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, LHSMask,
+ DAG.getConstant(RHSBits, DL, VT)));
}
if (RHSMask.getNode()) {
- APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
- Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, RHSMask,
+ DAG.getConstant(LHSBits, DL, VT)));
}
- Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
}
return Rot.getNode();
@@ -4112,10 +4226,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
// Simplify the expression using non-local knowledge.
if (!VT.isVector() &&
@@ -4434,12 +4547,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
}
- if (N1C && !N1C->isOpaque()) {
- SDValue NewSHL = visitShiftByConstant(N, N1C);
- if (NewSHL.getNode())
- return NewSHL;
+ // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
+ if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ if (SDValue Folded =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
+ }
}
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSHL = visitShiftByConstant(N, N1C))
+ return NewSHL;
+
return SDValue();
}
@@ -4583,11 +4703,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
- if (N1C && !N1C->isOpaque()) {
- SDValue NewSRA = visitShiftByConstant(N, N1C);
- if (NewSRA.getNode())
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRA = visitShiftByConstant(N, N1C))
return NewSRA;
- }
return SDValue();
}
@@ -4744,8 +4862,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
- SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
- if (NewOp1.getNode())
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
}
@@ -4754,15 +4871,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- if (N1C && !N1C->isOpaque()) {
- SDValue NewSRL = visitShiftByConstant(N, N1C);
- if (NewSRL.getNode())
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRL = visitShiftByConstant(N, N1C))
return NewSRL;
- }
// Attempt to convert a srl of a load into a narrower zero-extending load.
- SDValue NarrowLoad = ReduceLoadWidth(N);
- if (NarrowLoad.getNode())
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
return NarrowLoad;
// Here is a common situation. We want to optimize:
@@ -4973,70 +5087,47 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
- // fold selects based on a setcc into other things, such as min/max/abs
- if (N0.getOpcode() == ISD::SETCC) {
- // select x, y (fcmp lt x, y) -> fminnum x, y
- // select x, y (fcmp gt x, y) -> fmaxnum x, y
- //
- // This is OK if we don't care about what happens if either operand is a
- // NaN.
- //
-
- // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
- // no signed zeros as well as no nans.
- const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.UnsafeFPMath &&
- VT.isFloatingPoint() && N0.hasOneUse() &&
- DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
-
- SDValue FMinMax =
- combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
- N1, N2, CC, TLI, DAG);
- if (FMinMax)
- return FMinMax;
- }
-
- if ((!LegalOperations &&
- TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
- TLI.isOperationLegal(ISD::SELECT_CC, VT))
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- N1, N2, N0.getOperand(2));
- return SimplifySelect(SDLoc(N), N0, N1, N2);
- }
-
if (VT0 == MVT::i1) {
- if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
- // select (and Cond0, Cond1), X, Y
- // -> select Cond0, (select Cond1, X, Y), Y
- if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
- SDValue Cond0 = N0->getOperand(0);
- SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ // The code in this block deals with the following 2 equivalences:
+ // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
+ // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
+ // The target can specify its prefered form with the
+ // shouldNormalizeToSelectSequence() callback. However we always transform
+ // to the right anyway if we find the inner select exists in the DAG anyway
+ // and we always transform to the left side if we know that we can further
+ // optimize the combination of the conditions.
+ bool normalizeToSequence
+ = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
InnerSelect, N2);
- }
- // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
- if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
- SDValue Cond0 = N0->getOperand(0);
- SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
InnerSelect);
- }
}
// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
- if (N1->getOpcode() == ISD::SELECT) {
+ if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
SDValue N1_0 = N1->getOperand(0);
SDValue N1_1 = N1->getOperand(1);
SDValue N1_2 = N1->getOperand(2);
if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
// Create the actual and node if we can generate good code for it.
- if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ if (!normalizeToSequence) {
SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
N0, N1_0);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
@@ -5049,13 +5140,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
- if (N2->getOpcode() == ISD::SELECT) {
+ if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
SDValue N2_0 = N2->getOperand(0);
SDValue N2_1 = N2->getOperand(1);
SDValue N2_2 = N2->getOperand(2);
if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
// Create the actual or node if we can generate good code for it.
- if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ if (!normalizeToSequence) {
SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
N0, N2_0);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
@@ -5069,6 +5160,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // select x, y (fcmp lt x, y) -> fminnum x, y
+ // select x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+
+ // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
+ // no signed zeros as well as no nans.
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.UnsafeFPMath &&
+ VT.isFloatingPoint() && N0.hasOneUse() &&
+ DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+
+ if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1), N1, N2, CC,
+ TLI, DAG))
+ return FMinMax;
+ }
+
+ if ((!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(SDLoc(N), N0, N1, N2);
+ }
+
return SDValue();
}
@@ -5523,8 +5646,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
N2.getOpcode() == ISD::CONCAT_VECTORS &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
- SDValue CV = ConvertSelectToConcatVector(N, DAG);
- if (CV.getNode())
+ if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
return CV;
}
@@ -5580,7 +5702,20 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
SDLoc(N));
}
-/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
+SDValue DAGCombiner::visitSETCCE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+
+ // If Carry is false, fold to a regular SETCC.
+ if (Carry.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+ return SDValue();
+}
+
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
/// a build_vector of constants.
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
@@ -5837,8 +5972,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -6024,7 +6158,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (!VT.isVector()) {
EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
- if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
SDLoc DL(N);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
@@ -6120,8 +6255,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -6133,32 +6267,45 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
// fold (zext (truncate x)) -> (and x, mask)
- if (N0.getOpcode() == ISD::TRUNCATE &&
- (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
-
+ if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorklist(oye);
}
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
- SDValue Op = N0.getOperand(0);
- if (Op.getValueType().bitsLT(VT)) {
- Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
- } else if (Op.getValueType().bitsGT(VT)) {
- Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
+ EVT SrcVT = N0.getOperand(0).getValueType();
+ EVT MinVT = N0.getValueType();
+
+ // Try to mask before the extension to avoid having to generate a larger mask,
+ // possibly over several sub-vectors.
+ if (SrcVT.bitsLT(VT)) {
+ if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
+ SDValue Op = N0.getOperand(0);
+ Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ AddToWorklist(Op.getNode());
+ return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ }
+ }
+
+ if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
+ SDValue Op = N0.getOperand(0);
+ if (SrcVT.bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ } else if (SrcVT.bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ }
+ return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
}
- return DAG.getZeroExtendInReg(Op, SDLoc(N),
- N0.getValueType().getScalarType());
}
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
@@ -6219,6 +6366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
+ // Unless (and (load x) cst) will match as a zextload already and has
+ // additional users.
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
@@ -6229,9 +6378,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
- SetCCs, TLI);
+ if (!N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::AND) {
+ auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
+ auto NarrowLoad = false;
+ EVT LoadResultTy = AndC->getValueType(0);
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad))
+ DoXform = false;
+ }
+ if (DoXform)
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI);
+ }
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
@@ -6378,8 +6538,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (truncate (load x))) -> (aext (smaller load x))
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -6546,8 +6705,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
// Watch out for shift count overflow though.
if (Amt >= Mask.getBitWidth()) break;
APInt NewMask = Mask << Amt;
- SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
- if (SimplifyLHS.getNode())
+ if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
SimplifyLHS, V.getOperand(1));
}
@@ -6736,8 +6894,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
unsigned VTBits = VT.getScalarType().getSizeInBits();
unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (sext_in_reg c1) -> c1
- if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
@@ -6771,8 +6932,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (load x)) -> (smaller sextload x)
// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
- SDValue NarrowLoad = ReduceLoadWidth(N);
- if (NarrowLoad.getNode())
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
return NarrowLoad;
// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
@@ -6831,29 +6991,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
BSwap, N1);
}
- // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
- // into a build_vector.
- if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
- SmallVector<SDValue, 8> Elts;
- unsigned NumElts = N0->getNumOperands();
- unsigned ShAmt = VTBits - EVTBits;
-
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Op = N0->getOperand(i);
- if (Op->getOpcode() == ISD::UNDEF) {
- Elts.push_back(Op);
- continue;
- }
-
- ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
- const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
- Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
- SDLoc(Op), Op.getValueType()));
- }
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
- }
-
return SDValue();
}
@@ -6999,9 +7136,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
- SDValue Reduced = ReduceLoadWidth(N);
- if (Reduced.getNode())
+ if (SDValue Reduced = ReduceLoadWidth(N))
return Reduced;
+
// Handle the case where the load remains an extending load even
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
@@ -7107,6 +7244,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
return SDValue();
}
+static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
+ // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
+ // and Lo parts; on big-endian machines it doesn't.
+ return DAG.getDataLayout().isBigEndian() ? 1 : 0;
+}
+
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7173,6 +7316,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fneg x)) ->
+ // flipbit = signbit
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ // fold (bitcast (fabs x)) ->
+ // flipbit = (and (extract_element (bitcast x), 0), signbit)
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
@@ -7183,6 +7334,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(NewConv.getNode());
SDLoc DL(N);
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ assert(VT.getSizeInBits() == 128);
+ SDValue SignBit = DAG.getConstant(
+ APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+ SDValue FlipBit;
+ if (N0.getOpcode() == ISD::FNEG) {
+ FlipBit = SignBit;
+ AddToWorklist(FlipBit.getNode());
+ } else {
+ assert(N0.getOpcode() == ISD::FABS);
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(NewConv)));
+ AddToWorklist(Hi.getNode());
+ FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
+ AddToWorklist(FlipBit.getNode());
+ }
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
+ }
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT,
@@ -7196,6 +7370,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// (or (and (bitconvert x), sign), (and cst, (not sign)))
// Note that we don't handle (copysign x, cst) because this can always be
// folded to an fneg or fabs.
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fcopysign cst, x)) ->
+ // flipbit = (and (extract_element
+ // (xor (bitcast cst), (bitcast x)), 0),
+ // signbit)
+ // (xor (bitcast cst) (build_pair flipbit, flipbit))
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
@@ -7224,6 +7405,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(X.getNode());
}
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
+ SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT,
+ N0.getOperand(0));
+ AddToWorklist(Cst.getNode());
+ SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT,
+ N0.getOperand(1));
+ AddToWorklist(X.getNode());
+ SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
+ AddToWorklist(XorResult.getNode());
+ SDValue XorResult64 = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(XorResult)));
+ AddToWorklist(XorResult64.getNode());
+ SDValue FlipBit =
+ DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
+ DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
+ AddToWorklist(FlipBit.getNode());
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
+ }
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, SDLoc(X), VT));
@@ -7240,11 +7445,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
- if (N0.getOpcode() == ISD::BUILD_PAIR) {
- SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
- if (CombineLD.getNode())
+ if (N0.getOpcode() == ISD::BUILD_PAIR)
+ if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
return CombineLD;
- }
// Remove double bitcasts from shuffles - this is often a legacy of
// XformToShuffleWithZero being used to combine bitmaskings (of
@@ -7257,10 +7460,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
// If operands are a bitcast, peek through if it casts the original VT.
- // If operands are a UNDEF or constant, just bitcast back to original VT.
+ // If operands are a constant, just bitcast back to original VT.
auto PeekThroughBitcast = [&](SDValue Op) {
if (Op.getOpcode() == ISD::BITCAST &&
- Op.getOperand(0)->getValueType(0) == VT)
+ Op.getOperand(0).getValueType() == VT)
return SDValue(Op.getOperand(0));
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
@@ -7431,28 +7634,34 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath);
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations &&
- TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
- bool HasFMA = ((!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- UnsafeFPMath);
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
// Always prefer FMAD to FMA for precision.
- unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && N0.getOpcode() == ISD::FMUL &&
+ N1.getOpcode() == ISD::FMUL) {
+ if (N0.getNode()->use_size() > N1.getNode()->use_size())
+ std::swap(N0, N1);
+ }
+
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
@@ -7469,7 +7678,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -7495,7 +7704,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+ if ((AllowFusion || HasFMAD) && Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
@@ -7518,7 +7727,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N0));
}
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
@@ -7608,25 +7817,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath);
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations &&
- TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
- bool HasFMA = ((!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- UnsafeFPMath);
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
// Always prefer FMAD to FMA for precision.
- unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
@@ -7659,7 +7866,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
@@ -7735,7 +7942,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+ if ((AllowFusion || HasFMAD) && Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode &&
@@ -7765,7 +7972,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N21, N0));
}
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode) {
@@ -7866,14 +8073,97 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return SDValue();
}
+/// Try to perform FMA combining on a given FMUL node.
+SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+ // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
+ // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
+ auto FuseFADD = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
+ auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+ if (XC1 && XC1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ if (XC1 && XC1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFADD(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFADD(N1, N0))
+ return FMA;
+
+ // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
+ // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
+ // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
+ // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
+ auto FuseFSUB = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
+ auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
+ if (XC0 && XC0->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ Y);
+ if (XC0 && XC0->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+
+ auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+ if (XC1 && XC1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ if (XC1 && XC1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFSUB(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFSUB(N1, N0))
+ return FMA;
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -7882,23 +8172,23 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations));
+ GetNegatedExpression(N0, DAG, LegalOperations), Flags);
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
@@ -7907,14 +8197,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool AllowNewConst = (Level < AfterLegalizeDAG);
// fold (fadd A, 0) -> A
- if (N1CFP && N1CFP->isZero())
- return N0;
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+ if (N1C->isZero())
+ return N0;
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
- isa<ConstantFPSDNode>(N0.getOperand(1)))
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
+ DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
+ Flags),
+ Flags);
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
@@ -7929,64 +8222,64 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
- ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
- DAG.getConstantFP(1.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
- DAG.getConstantFP(2.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
}
}
if (N1.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
- ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
- DAG.getConstantFP(1.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
- DAG.getConstantFP(2.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
}
}
if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
- if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
return DAG.getNode(ISD::FMUL, DL, VT,
- N1, DAG.getConstantFP(3.0, DL, VT));
+ N1, DAG.getConstantFP(3.0, DL, VT), Flags);
}
}
if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
return DAG.getNode(ISD::FMUL, DL, VT,
- N0, DAG.getConstantFP(3.0, DL, VT));
+ N0, DAG.getConstantFP(3.0, DL, VT), Flags);
}
}
@@ -7996,15 +8289,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(4.0, DL, VT), Flags);
}
}
} // enable-unsafe-fp-math
// FADD -> FMA combines:
- SDValue Fused = visitFADDForFMACombine(N);
- if (Fused) {
+ if (SDValue Fused = visitFADDForFMACombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -8020,6 +8312,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -8028,12 +8321,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
+ return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, dl, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
@@ -8068,8 +8361,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// FSUB -> FMA combines:
- SDValue Fused = visitFSUBForFMACombine(N);
- if (Fused) {
+ if (SDValue Fused = visitFSUBForFMACombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -8085,6 +8377,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector()) {
@@ -8095,12 +8388,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
// canonicalize constant to RHS
if (isConstantFPBuildVectorOrConstantFP(N0) &&
!isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
// fold (fmul A, 1.0) -> A
if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -8129,8 +8422,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// the second operand of the outer multiply are constants.
if ((N1CFP && isConstOrConstSplatFP(N01)) ||
(BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
- return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
}
}
}
@@ -8139,16 +8432,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
// during an early run of DAGCombiner can prevent folding with fmuls
// inserted during lowering.
- if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
+ if (N0.getOpcode() == ISD::FADD &&
+ (N0.getOperand(0) == N0.getOperand(1)) &&
+ N0.hasOneUse()) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
}
}
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
@@ -8163,10 +8458,17 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FMUL, DL, VT,
GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
}
}
+ // FMUL -> FMA combines:
+ if (SDValue Fused = visitFMULForFMACombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+
return SDValue();
}
@@ -8193,66 +8495,145 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP && N1CFP->isZero())
return N2;
}
+ // TODO: The FMA node should have flags that propagate to these nodes.
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
- if (N0CFP && !N1CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
- // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
- if (Options.UnsafeFPMath && N1CFP &&
- N2.getOpcode() == ISD::FMUL &&
- N0 == N2.getOperand(0) &&
- N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
- }
+ // TODO: FMA nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+ if (Options.UnsafeFPMath) {
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+ &Flags), &Flags);
+ }
- // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
- if (Options.UnsafeFPMath &&
- N0.getOpcode() == ISD::FMUL && N1CFP &&
- N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
- return DAG.getNode(ISD::FMA, dl, VT,
- N0.getOperand(0),
- DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
- N2);
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (N0.getOpcode() == ISD::FMUL &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+ &Flags),
+ N2);
+ }
}
// (fma x, 1, y) -> (fadd x, y)
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
+ // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
AddToWorklist(RHSNeg.getNode());
+ // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
}
}
- // (fma x, c, x) -> (fmul x, (c+1))
- if (Options.UnsafeFPMath && N1CFP && N0 == N2)
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, dl, VT)));
-
- // (fma x, c, (fneg x)) -> (fmul x, (c-1))
- if (Options.UnsafeFPMath && N1CFP &&
- N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
+ if (Options.UnsafeFPMath) {
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, dl, VT)));
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(1.0, dl, VT),
+ &Flags), &Flags);
+ }
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(-1.0, dl, VT),
+ &Flags), &Flags);
+ }
+ }
return SDValue();
}
+// Combine multiple FDIVs with the same divisor into multiple FMULs by the
+// reciprocal.
+// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// Notice that this is not always beneficial. One reason is different target
+// may have different costs for FDIV and FMUL, so sometimes the cost of two
+// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+ bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
+ const SDNodeFlags *Flags = N->getFlags();
+ if (!UnsafeMath && !Flags->hasAllowReciprocal())
+ return SDValue();
+
+ // Skip if current node is a reciprocal.
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return SDValue();
+
+ // Exit early if the target does not want this transform or if there can't
+ // possibly be enough uses of the divisor to make the transform worthwhile.
+ SDValue N1 = N->getOperand(1);
+ unsigned MinUses = TLI.combineRepeatedFPDivisors();
+ if (!MinUses || N1->use_size() < MinUses)
+ return SDValue();
+
+ // Find all FDIV users of the same divisor.
+ // Use a set because duplicates may be present in the user list.
+ SetVector<SDNode *> Users;
+ for (auto *U : N1->uses()) {
+ if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // This division is eligible for optimization only if global unsafe math
+ // is enabled or if this division allows reciprocal formation.
+ if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
+ Users.insert(U);
+ }
+ }
+
+ // Now that we have the actual number of divisor uses, make sure it meets
+ // the minimum threshold specified by the target.
+ if (Users.size() < MinUses)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
+
+ // Dividend / Divisor -> Dividend * Reciprocal
+ for (auto *U : Users) {
+ SDValue Dividend = U->getOperand(0);
+ if (Dividend != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+ Reciprocal, Flags);
+ CombineTo(U, NewNode);
+ } else if (U != Reciprocal.getNode()) {
+ // In the absence of fast-math-flags, this user node is always the
+ // same node as Reciprocal, but with FMF they may be different nodes.
+ CombineTo(U, Reciprocal);
+ }
+ }
+ return SDValue(N, 0); // N was replaced.
+}
+
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8261,6 +8642,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -8269,7 +8651,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
if (Options.UnsafeFPMath) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
@@ -8288,28 +8670,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getConstantFP(Recip, DL, VT));
+ DAG.getConstantFP(Recip, DL, VT), Flags);
}
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
@@ -8326,18 +8710,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SqrtOp.getNode()) {
// We found a FSQRT, so try to make this fold:
// x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
- if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
- RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+ if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
+ RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
}
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildReciprocalEstimate(N1)) {
+ if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
}
@@ -8349,52 +8733,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
}
}
- // Combine multiple FDIVs with the same divisor into multiple FMULs by the
- // reciprocal.
- // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
- // Notice that this is not always beneficial. One reason is different target
- // may have different costs for FDIV and FMUL, so sometimes the cost of two
- // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
- // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
- if (Options.UnsafeFPMath) {
- // Skip if current node is a reciprocal.
- if (N0CFP && N0CFP->isExactlyValue(1.0))
- return SDValue();
-
- // Find all FDIV users of the same divisor.
- // Use a set because duplicates may be present in the user list.
- SetVector<SDNode *> Users;
- for (auto *U : N1->uses())
- if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
- Users.insert(U);
-
- if (TLI.combineRepeatedFPDivisors(Users.size())) {
- SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- // FIXME: This optimization requires some level of fast-math, so the
- // created reciprocal node should at least have the 'allowReciprocal'
- // fast-math-flag set.
- SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
-
- // Dividend / Divisor -> Dividend * Reciprocal
- for (auto *U : Users) {
- SDValue Dividend = U->getOperand(0);
- if (Dividend != FPOne) {
- SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
- Reciprocal);
- CombineTo(U, NewNode);
- } else if (U != Reciprocal.getNode()) {
- // In the absence of fast-math-flags, this user node is always the
- // same node as Reciprocal, but with FMF they may be different nodes.
- CombineTo(U, Reciprocal);
- }
- }
- return SDValue(N, 0); // N was replaced.
- }
- }
+ if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
+ return CombineRepeatedDivisors;
return SDValue();
}
@@ -8408,7 +8753,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
+ &cast<BinaryWithFlagsSDNode>(N)->Flags);
return SDValue();
}
@@ -8417,20 +8763,25 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
return SDValue();
+ // TODO: FSQRT nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
- SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+ SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags);
if (!RV)
return SDValue();
-
+
EVT VT = RV.getValueType();
SDLoc DL(N);
- RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+ RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags);
AddToWorklist(RV.getNode());
// Unfortunately, RV is now NaN if the input was exactly 0.
// Select out this case and force the answer to 0.
SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
- EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ EVT CCVT = getSetCCResultType(VT);
SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
AddToWorklist(ZeroCmp.getNode());
AddToWorklist(RV.getNode());
@@ -8439,6 +8790,22 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
ZeroCmp, Zero, RV);
}
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ // Do not optimize out type conversion of f128 type yet.
+ // For some target like x86_64, configuration is changed
+ // to keep one f128 value in one SSE register, but
+ // instruction selection cannot handle FCOPYSIGN on
+ // SSE registers yet.
+ SDValue N1 = N->getOperand(1);
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+ return (N1.getOpcode() == ISD::FP_EXTEND ||
+ N1.getOpcode() == ISD::FP_ROUND) &&
+ (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+}
+
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8482,7 +8849,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
// copysign(x, fp_extend(y)) -> copysign(x, y)
// copysign(x, fp_round(y)) -> copysign(x, y)
- if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
N0, N1.getOperand(0));
@@ -8837,11 +9204,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
- TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
- return DAG.getNode(
- ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+ (TLI.isFPImmLegal(CVal, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N0.getOperand(1)),
+ &cast<BinaryWithFlagsSDNode>(N0)->Flags);
}
}
@@ -8851,20 +9219,20 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
+ return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
}
- if (N0CFP) {
- EVT VT = N->getValueType(0);
- // Canonicalize to constant on RHS.
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
- }
return SDValue();
}
@@ -8872,20 +9240,20 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
+ return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
}
- if (N0CFP) {
- EVT VT = N->getValueType(0);
- // Canonicalize to constant on RHS.
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
- }
return SDValue();
}
@@ -9034,8 +9402,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue Op1 = TheXor->getOperand(1);
if (Op0.getOpcode() == Op1.getOpcode()) {
// Avoid missing important xor optimizations.
- SDValue Tmp = visitXOR(TheXor);
- if (Tmp.getNode()) {
+ if (SDValue Tmp = visitXOR(TheXor)) {
if (Tmp.getNode() != TheXor) {
DEBUG(dbgs() << "\nReplacing.8 ";
TheXor->dump(&DAG);
@@ -9722,8 +10089,8 @@ struct LoadedSlice {
void addSliceGain(const LoadedSlice &LS) {
// Each slice saves a truncate.
const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
- if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
- LS.Inst->getOperand(0).getValueType()))
+ if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
+ LS.Inst->getValueType(0)))
++Truncates;
// If there is a shift amount, this slice gets rid of it.
if (LS.Shift)
@@ -10625,30 +10992,109 @@ struct BaseIndexOffset {
};
} // namespace
+// This is a helper function for visitMUL to check the profitability
+// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+// MulNode is the original multiply, AddNode is (add x, c1),
+// and ConstNode is c2.
+//
+// If the (add x, c1) has multiple uses, we could increase
+// the number of adds if we make this transformation.
+// It would only be worth doing this if we can remove a
+// multiply in the process. Check for that here.
+// To illustrate:
+// (A + c1) * c3
+// (A + c2) * c3
+// We're checking for cases where we have common "c3 * A" expressions.
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode) {
+ APInt Val;
+
+ // If the add only has one use, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse())
+ return true;
+
+ // Walk all the users of the constant with which we're multiplying.
+ for (SDNode *Use : ConstNode->uses()) {
+
+ if (Use == MulNode) // This use is the one we're on right now. Skip it.
+ continue;
+
+ if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
+ SDNode *OtherOp;
+ SDNode *MulVar = AddNode.getOperand(0).getNode();
+
+ // OtherOp is what we're multiplying against the constant.
+ if (Use->getOperand(0) == ConstNode)
+ OtherOp = Use->getOperand(1).getNode();
+ else
+ OtherOp = Use->getOperand(0).getNode();
+
+ // Check to see if multiply is with the same operand of our "add".
+ //
+ // ConstNode = CONST
+ // Use = ConstNode * A <-- visiting Use. OtherOp is A.
+ // ...
+ // AddNode = (A + c1) <-- MulVar is A.
+ // = AddNode * ConstNode <-- current visiting instruction.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (ConstNode * A) that we can save.
+ if (OtherOp == MulVar)
+ return true;
+
+ // Now check to see if a future expansion will give us a common
+ // multiply.
+ //
+ // ConstNode = CONST
+ // AddNode = (A + c1)
+ // ... = AddNode * ConstNode <-- current visiting instruction.
+ // ...
+ // OtherOp = (A + c2)
+ // Use = OtherOp * ConstNode <-- visiting Use.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (CONST * A) after we also do the same transformation
+ // to the "t2" instruction.
+ if (OtherOp->getOpcode() == ISD::ADD &&
+ isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
+ OtherOp->getOperand(0).getNode() == MulVar)
+ return true;
+ }
+ }
+
+ // Didn't find a case where this would be profitable.
+ return false;
+}
+
SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
SDLoc SL,
ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains,
EVT Ty) const {
SmallVector<SDValue, 8> BuildVector;
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
- BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
+ for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
+ Chains.push_back(St->getChain());
+ BuildVector.push_back(St->getValue());
+ }
return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
- unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+ unsigned NumStores, bool IsConstantSrc, bool UseVector) {
// Make sure we have something to merge.
- if (NumElem < 2)
+ if (NumStores < 2)
return false;
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned LatestNodeUsed = 0;
- for (unsigned i=0; i < NumElem; ++i) {
+ for (unsigned i=0; i < NumStores; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
@@ -10657,45 +11103,57 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
LatestNodeUsed = i;
}
+ SmallVector<SDValue, 8> Chains;
+
// The latest Node in the DAG.
LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
if (UseVector) {
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ bool IsVec = MemVT.isVector();
+ unsigned Elts = NumStores;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ // Get the type for the merged vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+
if (IsConstantSrc) {
- StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
+ StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
} else {
SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < NumElem ; ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue Val = St->getValue();
- // All of the operands of a BUILD_VECTOR must have the same type.
+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
+ Chains.push_back(St->getChain());
}
// Build the extracted vector elements back into a vector.
- StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
- }
+ StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
+ DL, Ty, Ops); }
} else {
// We should always use a vector store when merging extracted vector
// elements, so this path implies a store of constants.
assert(IsConstantSrc && "Merged vector elements should use vector store");
- unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
APInt StoreInt(SizeInBits, 0);
// Construct a single integer constant which is made of the smaller
// constant inputs.
bool IsLE = DAG.getDataLayout().isLittleEndian();
- for (unsigned i = 0; i < NumElem ; ++i) {
- unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+ for (unsigned i = 0; i < NumStores; ++i) {
+ unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ Chains.push_back(St->getChain());
+
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes * 8;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
@@ -10712,7 +11170,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
- SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
+ assert(!Chains.empty());
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
false, false,
@@ -10721,7 +11182,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// Replace the last store with the new store
CombineTo(LatestOp, NewStore);
// Erase all other stores.
- for (unsigned i = 0; i < NumElem ; ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
@@ -10743,17 +11204,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
return true;
}
-static bool allowableAlignment(const SelectionDAG &DAG,
- const TargetLowering &TLI, EVT EVTTy,
- unsigned AS, unsigned Align) {
- if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
- return true;
-
- Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
- return (Align >= ABIAlignment);
-}
-
void DAGCombiner::getStoreMergeAndAliasCandidates(
StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
@@ -10775,6 +11225,38 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
EVT MemVT = St->getMemoryVT();
unsigned Seq = 0;
StoreSDNode *Index = St;
+
+
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+
+ if (UseAA) {
+ // Look at other users of the same chain. Stores on the same chain do not
+ // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
+ // to be on the same chain, so don't bother looking at adjacent chains.
+
+ SDValue Chain = St->getChain();
+ for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ if (I.getOperandNo() != 0)
+ continue;
+
+ if (OtherST->isVolatile() || OtherST->isIndexed())
+ continue;
+
+ if (OtherST->getMemoryVT() != MemVT)
+ continue;
+
+ BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr());
+
+ if (Ptr.equalBaseIndex(BasePtr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
+ }
+ }
+
+ return;
+ }
+
while (Index) {
// If the chain has more than one use, then we can't reorder the mem ops.
if (Index != St && !SDValue(Index, 0)->hasOneUse())
@@ -10800,6 +11282,13 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
if (Index->getMemoryVT() != MemVT)
break;
+ // We do not allow under-aligned stores in order to prevent
+ // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
+ // be irrelevant here; what MATTERS is that we not move memory
+ // operations that potentially overlap past each-other.
+ if (Index->getAlignment() < MemVT.getStoreSize())
+ break;
+
// We found a potential memory operand to merge.
StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
@@ -10844,8 +11333,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
return false;
- // Don't merge vectors into wider inputs.
- if (MemVT.isVector() || !MemVT.isSimple())
+ if (!MemVT.isSimple())
return false;
// Perform an early exit check. Do not bother looking at stored values that
@@ -10854,9 +11342,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
- bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
- if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
+ return false;
+
+ // Don't merge vectors into wider vectors if the source data comes from loads.
+ // TODO: This restriction can be lifted by using logic similar to the
+ // ExtractVecSrc case.
+ if (MemVT.isVector() && IsLoadSrc)
return false;
// Only look at ends of store sequences.
@@ -10868,22 +11363,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// We need to make sure that these nodes do not interfere with
// any of the store nodes.
SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
+
// Save the StoreSDNodes that we find in the chain.
SmallVector<MemOpLink, 8> StoreNodes;
getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
-
+
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
- // Sort the memory operands according to their distance from the base pointer.
+ // Sort the memory operands according to their distance from the
+ // base pointer. As a secondary criteria: make sure stores coming
+ // later in the code come first in the list. This is important for
+ // the non-UseAA case, because we're merging stores into the FINAL
+ // store along a chain which potentially contains aliasing stores.
+ // Thus, if there are multiple stores to the same address, the last
+ // one can be considered for merging but not the others.
std::sort(StoreNodes.begin(), StoreNodes.end(),
[](MemOpLink LHS, MemOpLink RHS) {
return LHS.OffsetFromBase < RHS.OffsetFromBase ||
(LHS.OffsetFromBase == RHS.OffsetFromBase &&
- LHS.SequenceNum > RHS.SequenceNum);
+ LHS.SequenceNum < RHS.SequenceNum);
});
// Scan the memory operations on the chain and find the first non-consecutive
@@ -10900,15 +11401,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
break;
}
- bool Alias = false;
// Check if this store interferes with any of the loads that we found.
- for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
- if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
- Alias = true;
- break;
- }
- // We found a load that alias with this store. Stop the sequence.
- if (Alias)
+ // If we find a load that alias with this store. Stop the sequence.
+ if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
+ [&](LSBaseSDNode* Ldn) {
+ return isAlias(Ldn, StoreNodes[i].MemNode);
+ }))
break;
// Mark this node as useful.
@@ -10919,6 +11417,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
// Store the constants into memory as one consecutive store.
if (IsConstantSrc) {
@@ -10940,43 +11440,40 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Find a legal type for the constant store.
unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast;
if (TLI.isTypeLegal(StoreTy) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
- FirstStoreAlign)) {
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast) {
LastLegalType = i+1;
// Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
- FirstStoreAlign)) {
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ IsFast) {
LastLegalType = i + 1;
}
}
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
- if (TLI.isTypeLegal(Ty) &&
- allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
- LastLegalVectorType = i + 1;
+ // We only use vectors if the constant is known to be zero or the target
+ // allows it and the function is not marked with the noimplicitfloat
+ // attribute.
+ if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
+ FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast)
+ LastLegalVectorType = i + 1;
}
}
-
- // We only use vectors if the constant is known to be zero or the target
- // allows it and the function is not marked with the noimplicitfloat
- // attribute.
- if (NoVectors) {
- LastLegalVectorType = 0;
- } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
- LastLegalVectorType,
- FirstStoreAS)) {
- LastLegalVectorType = 0;
- }
-
// Check if we found a legal integer type to store.
if (LastLegalType == 0 && LastLegalVectorType == 0)
return false;
@@ -10990,27 +11487,36 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
- if (IsExtractVecEltSrc) {
- unsigned NumElem = 0;
+ if (IsExtractVecSrc) {
+ unsigned NumStoresToMerge = 0;
+ bool IsVec = MemVT.isVector();
for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = St->getValue();
+ unsigned StoreValOpcode = St->getValue().getOpcode();
// This restriction could be loosened.
// Bail out if any stored values are not elements extracted from a vector.
// It should be possible to handle mixed sources, but load sources need
// more careful handling (see the block of code below that handles
// consecutive loads).
- if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
+ StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
return false;
// Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ unsigned Elts = i + 1;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast;
if (TLI.isTypeLegal(Ty) &&
- allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
- NumElem = i + 1;
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast)
+ NumStoresToMerge = i + 1;
}
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
false, true);
}
@@ -11084,7 +11590,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads much share the same chain.
+ // All loads must share the same chain.
if (LoadNodes[i].MemNode->getChain() != FirstChain)
break;
@@ -11092,35 +11598,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
-
// Find a legal type for the vector store.
- EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
+ bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) && IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) && IsFastLd) {
LastLegalVectorType = i + 1;
}
// Find a legal type for the integer store.
unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) && IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) && IsFastLd)
LastLegalIntegerType = i + 1;
// Or check whether a truncstore and extload is legal.
- else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
+ TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
- FirstStoreAlign) &&
- allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
- FirstLoadAlign))
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
+ IsFastLd)
LastLegalIntegerType = i+1;
}
}
@@ -11138,6 +11650,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (NumElem < 2)
return false;
+ // Collect the chains from all merged stores.
+ SmallVector<SDValue, 8> MergeStoreChains;
+ MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
+
// The latest Node in the DAG.
unsigned LatestNodeUsed = 0;
for (unsigned i=1; i<NumElem; ++i) {
@@ -11147,6 +11663,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// latest store node which is *used* and replaced by the wide store.
if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
LatestNodeUsed = i;
+
+ MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
}
LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
@@ -11155,34 +11673,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// to memory.
EVT JointMemOpVT;
if (UseVectorTy) {
- JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
} else {
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
- JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
}
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
SDValue NewLoad = DAG.getLoad(
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
+ SDValue NewStoreChain =
+ DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+
SDValue NewStore = DAG.getStore(
- LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
- // Replace one of the loads with the new load.
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
-
- // Remove the rest of the load chains.
- for (unsigned i = 1; i < NumElem ; ++i) {
- // Replace all chain users of the old load nodes with the chain of the new
- // load node.
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
}
// Replace the last store with the new store.
@@ -11200,6 +11717,114 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
return true;
}
+SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
+ SDLoc SL(ST);
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
+ ST->getMemOperand());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
+ MVT::Other, ST->getChain(), ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorklist(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(ST, Token, false);
+}
+
+SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
+ SDValue Value = ST->getValue();
+ if (Value.getOpcode() == ISD::TargetConstantFP)
+ return SDValue();
+
+ SDLoc DL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
+
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+
+ SDValue Tmp;
+ switch (CFP->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("Unknown FP type");
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
+ case MVT::f128:
+ case MVT::ppcf128:
+ return SDValue();
+ case MVT::f32:
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ ;
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+ MVT::i32);
+ return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
+ }
+
+ return SDValue();
+ case MVT::f64:
+ if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ ;
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), SDLoc(CFP), MVT::i64);
+ return DAG.getStore(Chain, DL, Tmp,
+ Ptr, ST->getMemOperand());
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ SDValue St0 = DAG.getStore(Chain, DL, Lo,
+ Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal,
+ ST->getAlignment(), AAInfo);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, DL, Ptr.getValueType()));
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, DL, Hi,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
+ Alignment, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ St0, St1);
+ }
+
+ return SDValue();
+ }
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -11227,81 +11852,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
return Chain;
- // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
- // NOTE: If the original store is volatile, this transform must not increase
- // the number of stores. For example, on x86-32 an f64 can be stored in one
- // processor operation but an i64 (which is not legal) requires two. So the
- // transform should not be done in this case.
- if (Value.getOpcode() != ISD::TargetConstantFP) {
- SDValue Tmp;
- switch (CFP->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unknown FP type");
- case MVT::f16: // We don't do this for these yet.
- case MVT::f80:
- case MVT::f128:
- case MVT::ppcf128:
- break;
- case MVT::f32:
- if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
- ;
- Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
- bitcastToAPInt().getZExtValue(), SDLoc(CFP),
- MVT::i32);
- return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getMemOperand());
- }
- break;
- case MVT::f64:
- if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
- !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
- ;
- Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
- getZExtValue(), SDLoc(CFP), MVT::i64);
- return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getMemOperand());
- }
-
- if (!ST->isVolatile() &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
- // Many FP stores are not made apparent until after legalize, e.g. for
- // argument passing. Since this is so common, custom legalize the
- // 64-bit integer store into two 32-bit stores.
- uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
- SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
- if (DAG.getDataLayout().isBigEndian())
- std::swap(Lo, Hi);
-
- unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
- AAMDNodes AAInfo = ST->getAAInfo();
-
- SDLoc DL(N);
-
- SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
- Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal,
- ST->getAlignment(), AAInfo);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, DL, Ptr.getValueType()));
- Alignment = MinAlign(Alignment, 4U);
- SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
- Ptr, ST->getPointerInfo().getWithOffset(4),
- isVolatile, isNonTemporal,
- Alignment, AAInfo);
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- St0, St1);
- }
-
- break;
- }
- }
- }
-
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
@@ -11319,8 +11869,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try transforming a pair floating point load / store ops to integer
// load / store ops.
- SDValue NewST = TransformFPLoadStorePair(N);
- if (NewST.getNode())
+ if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
@@ -11331,31 +11880,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
UseAA = false;
#endif
if (UseAA && ST->isUnindexed()) {
- // Walk up chain skipping non-aliasing memory nodes.
- SDValue BetterChain = FindBetterChain(N, Chain);
-
- // If there is a better chain.
- if (Chain != BetterChain) {
- SDValue ReplStore;
-
- // Replace the chain to avoid dependency.
- if (ST->isTruncatingStore()) {
- ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getMemoryVT(), ST->getMemOperand());
- } else {
- ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getMemOperand());
- }
+ // FIXME: We should do this even without AA enabled. AA will just allow
+ // FindBetterChain to work in more situations. The problem with this is that
+ // any combine that expects memory operations to be on consecutive chains
+ // first needs to be updated to look for users of the same chain.
- // Create token to keep both nodes around.
- SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
- MVT::Other, Chain, ReplStore);
-
- // Make sure the new and old chains are cleaned up.
- AddToWorklist(Token.getNode());
-
- // Don't add users to work list.
- return CombineTo(N, Token, false);
+ // Walk up chain skipping non-aliasing memory nodes, on this store and any
+ // adjacent stores.
+ if (findBetterNeighborChains(ST)) {
+ // replaceStoreChain uses CombineTo, which handled all of the worklist
+ // manipulation. Return the original node to not do anything else.
+ return SDValue(ST, 0);
}
}
@@ -11440,6 +11975,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return SDValue(N, 0);
}
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ //
+ // Make sure to do this only after attempting to merge stores in order to
+ // avoid changing the types of some subset of stores due to visit order,
+ // preventing their merging.
+ if (isa<ConstantFPSDNode>(Value)) {
+ if (SDValue NewSt = replaceStoreOfFPConstant(ST))
+ return NewSt;
+ }
+
return ReduceLoadOpStoreWidth(N);
}
@@ -11613,7 +12158,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
SDValue EltNo = N->getOperand(1);
- bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+
+ // extract_vector_elt (build_vector x, y), 1 -> y
+ if (ConstEltNo &&
+ InVec.getOpcode() == ISD::BUILD_VECTOR &&
+ TLI.isTypeLegal(VT) &&
+ (InVec.hasOneUse() ||
+ TLI.aggressivelyPreferBuildVectorSources(VT))) {
+ SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
+ EVT InEltVT = Elt.getValueType();
+
+ // Sometimes build_vector's scalar input types do not match result type.
+ if (NVT == InEltVT)
+ return Elt;
+
+ // TODO: It may be useful to truncate if free if the build_vector implicitly
+ // converts.
+ }
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
@@ -11621,13 +12183,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// patterns. For example on AVX, extracting elements from a wide vector
// without using extract_subvector. However, if we can find an underlying
// scalar value, then we can always use that.
- if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
- && ConstEltNo) {
- int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
int NumElem = VT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
// Find the new index to extract from.
- int OrigElt = SVOp->getMaskElt(Elt);
+ int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
// Extracting an undef index is undef.
if (OrigElt == -1)
@@ -12183,12 +12743,90 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
}
-SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
- // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
- // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
- // inputs come from at most two distinct vectors, turn this into a shuffle
- // node.
+// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
+// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
+// most two distinct vectors the same size as the result, attempt to turn this
+// into a legal shuffle.
+static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ int NumElts = VT.getVectorNumElements();
+ int NumOpElts = OpVT.getVectorNumElements();
+
+ SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
+ SmallVector<int, 8> Mask;
+
+ for (SDValue Op : N->ops()) {
+ // Peek through any bitcast.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (Op.getOpcode() == ISD::UNDEF) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // What vector are we extracting the subvector from and at what index?
+ SDValue ExtVec = Op.getOperand(0);
+
+ // We want the EVT of the original extraction to correctly scale the
+ // extraction index.
+ EVT ExtVT = ExtVec.getValueType();
+
+ // Peek through any bitcast.
+ while (ExtVec.getOpcode() == ISD::BITCAST)
+ ExtVec = ExtVec.getOperand(0);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (ExtVec.getOpcode() == ISD::UNDEF) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ // Ensure that we are extracting a subvector from a vector the same
+ // size as the result.
+ if (ExtVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // Scale the subvector index to account for any bitcast.
+ int NumExtElts = ExtVT.getVectorNumElements();
+ if (0 == (NumExtElts % NumElts))
+ ExtIdx /= (NumExtElts / NumElts);
+ else if (0 == (NumElts % NumExtElts))
+ ExtIdx *= (NumElts / NumExtElts);
+ else
+ return SDValue();
+ // At most we can reference 2 inputs in the final shuffle.
+ if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) {
+ SV0 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx);
+ } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) {
+ SV1 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx + NumElts);
+ } else {
+ return SDValue();
+ }
+ }
+
+ if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+ DAG.getBitcast(VT, SV1), Mask);
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
return N->getOperand(0);
@@ -12289,6 +12927,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
return V;
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
+ return V;
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -12503,7 +13146,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
SVN->getMask().end(), [](int i) { return i == -1; })) {
N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+ makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
N1 = DAG.getUNDEF(ConcatVT);
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
}
@@ -12981,6 +13624,21 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
+ if (N0->getOpcode() == ISD::AND) {
+ ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
+ if (AndConst && AndConst->getAPIntValue() == 0xffff) {
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
+ N0.getOperand(0));
+ }
+ }
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -13002,34 +13660,76 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (RHS.getOpcode() == ISD::BITCAST)
RHS = RHS.getOperand(0);
- if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ EVT RVT = RHS.getValueType();
+ unsigned NumElts = RHS.getNumOperands();
+
+ // Attempt to create a valid clear mask, splitting the mask into
+ // sub elements and checking to see if each is
+ // all zeros or all ones - suitable for shuffle masking.
+ auto BuildClearMask = [&](int Split) {
+ int NumSubElts = NumElts * Split;
+ int NumSubBits = RVT.getScalarSizeInBits() / Split;
+
SmallVector<int, 8> Indices;
- unsigned NumElts = RHS.getNumOperands();
+ for (int i = 0; i != NumSubElts; ++i) {
+ int EltIdx = i / Split;
+ int SubIdx = i % Split;
+ SDValue Elt = RHS.getOperand(EltIdx);
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ Indices.push_back(-1);
+ continue;
+ }
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Elt = RHS.getOperand(i);
- if (isAllOnesConstant(Elt))
+ APInt Bits;
+ if (isa<ConstantSDNode>(Elt))
+ Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
+ else if (isa<ConstantFPSDNode>(Elt))
+ Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
+ else
+ return SDValue();
+
+ // Extract the sub element from the constant bit mask.
+ if (DAG.getDataLayout().isBigEndian()) {
+ Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
+ } else {
+ Bits = Bits.lshr(SubIdx * NumSubBits);
+ }
+
+ if (Split > 1)
+ Bits = Bits.trunc(NumSubBits);
+
+ if (Bits.isAllOnesValue())
Indices.push_back(i);
- else if (isNullConstant(Elt))
- Indices.push_back(NumElts+i);
+ else if (Bits == 0)
+ Indices.push_back(i + NumSubElts);
else
return SDValue();
}
// Let's see if the target supports this vector_shuffle.
- EVT RVT = RHS.getValueType();
- if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
+ EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
+ if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
return SDValue();
- // Return the new VECTOR_SHUFFLE node.
- EVT EltVT = RVT.getVectorElementType();
- SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
- DAG.getConstant(0, dl, EltVT));
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
- LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
- SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
- }
+ SDValue Zero = DAG.getConstant(0, dl, ClearVT);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
+ DAG.getBitcast(ClearVT, LHS),
+ Zero, &Indices[0]));
+ };
+
+ // Determine maximum split level (byte level masking).
+ int MaxSplit = 1;
+ if (RVT.getScalarSizeInBits() % 8 == 0)
+ MaxSplit = RVT.getScalarSizeInBits() / 8;
+
+ for (int Split = 1; Split <= MaxSplit; ++Split)
+ if (RVT.getScalarSizeInBits() % Split == 0)
+ if (SDValue S = BuildClearMask(Split))
+ return S;
return SDValue();
}
@@ -13041,60 +13741,17 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ SDValue Ops[] = {LHS, RHS};
+ // See if we can constant fold the vector operation.
+ if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
+ N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ return Fold;
+
+ // Try to convert a constant mask AND into a shuffle clear mask.
if (SDValue Shuffle = XformToShuffleWithZero(N))
return Shuffle;
- // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
- // this operation.
- if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
- RHS.getOpcode() == ISD::BUILD_VECTOR) {
- // Check if both vectors are constants. If not bail out.
- if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
- cast<BuildVectorSDNode>(RHS)->isConstant()))
- return SDValue();
-
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
- SDValue LHSOp = LHS.getOperand(i);
- SDValue RHSOp = RHS.getOperand(i);
-
- // Can't fold divide by zero.
- if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
- N->getOpcode() == ISD::FDIV) {
- if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
- cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
- break;
- }
-
- EVT VT = LHSOp.getValueType();
- EVT RVT = RHSOp.getValueType();
- if (RVT != VT) {
- // Integer BUILD_VECTOR operands may have types larger than the element
- // size (e.g., when the element type is not legal). Prior to type
- // legalization, the types may not match between the two BUILD_VECTORS.
- // Truncate one of the operands to make them match.
- if (RVT.getSizeInBits() > VT.getSizeInBits()) {
- RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
- } else {
- LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
- VT = RVT;
- }
- }
- SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
- LHSOp, RHSOp);
- if (FoldOp.getOpcode() != ISD::UNDEF &&
- FoldOp.getOpcode() != ISD::Constant &&
- FoldOp.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(FoldOp);
- AddToWorklist(FoldOp.getNode());
- }
-
- if (Ops.size() == LHS.getNumOperands())
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
- }
-
// Type legalization might introduce new shuffles in the DAG.
// Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
// -> (shuffle (VBinOp (A, B)), Undef, Mask).
@@ -13109,7 +13766,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue UndefVector = LHS.getOperand(1);
SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- LHS.getOperand(0), RHS.getOperand(0));
+ LHS.getOperand(0), RHS.getOperand(0),
+ N->getFlags());
AddUsersToWorklist(N);
return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
&SVN0->getMask()[0]);
@@ -13390,9 +14048,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
CstOffset);
AddToWorklist(CPIdx.getNode());
- return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(), false,
- false, false, Alignment);
+ return DAG.getLoad(
+ TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
}
}
@@ -13481,8 +14140,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// Get a SetCC of the condition
// NOTE: Don't create a SETCC if it's not legal on this target.
if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC,
- LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
+ TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
SDValue Temp, SCC;
// cast from setcc result type to select result type
if (LegalTypes) {
@@ -13514,51 +14172,6 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
}
}
- // Check to see if this is the equivalent of setcc
- // FIXME: Turn all of these into setcc if setcc if setcc is legal
- // otherwise, go ahead with the folds.
- if (0 && isNullConstant(N3) && isOneConstant(N2)) {
- EVT XType = N0.getValueType();
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
- SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
- if (Res.getValueType() != VT)
- Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
- return Res;
- }
-
- // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
- if (isNullConstant(N1) && CC == ISD::SETEQ &&
- (!LegalOperations ||
- TLI.isOperationLegal(ISD::CTLZ, XType))) {
- SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
- return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
- DAG.getConstant(Log2_32(XType.getSizeInBits()),
- SDLoc(Ctlz),
- getShiftAmountTy(Ctlz.getValueType())));
- }
- // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
- if (isNullConstant(N1) && CC == ISD::SETGT) {
- SDLoc DL(N0);
- SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
- XType, DAG.getConstant(0, DL, XType), N0);
- SDValue NotN0 = DAG.getNOT(DL, N0, XType);
- return DAG.getNode(ISD::SRL, DL, XType,
- DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
- DAG.getConstant(XType.getSizeInBits() - 1, DL,
- getShiftAmountTy(XType)));
- }
- // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
- if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
- SDLoc DL(N0);
- SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
- DAG.getConstant(XType.getSizeInBits() - 1, DL,
- getShiftAmountTy(N0.getValueType())));
- return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
- XType));
- }
- }
-
// Check to see if this is an integer abs.
// select_cc setg[te] X, 0, X, -X ->
// select_cc setgt X, -1, X, -X ->
@@ -13666,7 +14279,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return S;
}
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -13690,16 +14303,16 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
// Newton iterations: Est = Est + Est (1 - Arg * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
AddToWorklist(Est.getNode());
}
}
@@ -13716,31 +14329,32 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
- unsigned Iterations) {
+ unsigned Iterations,
+ SDNodeFlags *Flags) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
AddToWorklist(HalfArg.getNode());
- HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(Est.getNode());
}
return Est;
@@ -13752,7 +14366,8 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
- unsigned Iterations) {
+ unsigned Iterations,
+ SDNodeFlags *Flags) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -13760,25 +14375,25 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
// Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+ SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
AddToWorklist(HalfEst.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags);
AddToWorklist(Est.getNode());
}
return Est;
}
-SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -13790,8 +14405,8 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
AddToWorklist(Est.getNode());
if (Iterations) {
Est = UseOneConstNR ?
- BuildRsqrtNROneConst(Op, Est, Iterations) :
- BuildRsqrtNRTwoConst(Op, Est, Iterations);
+ BuildRsqrtNROneConst(Op, Est, Iterations, Flags) :
+ BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags);
}
return Est;
}
@@ -13955,14 +14570,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SDValue Chain = Chains.pop_back_val();
// For TokenFactor nodes, look at each operand and only continue up the
- // chain until we find two aliases. If we've seen two aliases, assume we'll
- // find more and revert to original chain since the xform is unlikely to be
- // profitable.
+ // chain until we reach the depth limit.
//
// FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
- if (Depth > 6 || Aliases.size() == 2) {
+ if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
Aliases.clear();
Aliases.push_back(OriginalChain);
return;
@@ -14094,6 +14707,83 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+
+ // We must have a base and an offset.
+ if (!BasePtr.Base.getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.Base.getOpcode() == ISD::UNDEF)
+ return false;
+
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ ChainedStores.push_back(St);
+
+ // Walk up the chain and look for nodes with offsets from the same
+ // base pointer. Stop when reaching an instruction with a different kind
+ // or instruction which has a different base pointer.
+ StoreSDNode *Index = St;
+ while (Index) {
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (Index != St && !SDValue(Index, 0)->hasOneUse())
+ break;
+
+ if (Index->isVolatile() || Index->isIndexed())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+
+ // Check that the base pointer is the same as the original one.
+ if (!Ptr.equalBaseIndex(BasePtr))
+ break;
+
+ // Find the next memory operand in the chain. If the next operand in the
+ // chain is a store then move up and continue the scan with the next
+ // memory operand. If the next operand is a load save it and use alias
+ // information to check if it interferes with anything.
+ SDNode *NextInChain = Index->getChain().getNode();
+ while (true) {
+ if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+ // We found a store node. Use it for the next iteration.
+ ChainedStores.push_back(STn);
+ Index = STn;
+ break;
+ } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+ NextInChain = Ldn->getChain().getNode();
+ continue;
+ } else {
+ Index = nullptr;
+ break;
+ }
+ }
+ }
+
+ bool MadeChange = false;
+ SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+
+ for (StoreSDNode *ChainedStore : ChainedStores) {
+ SDValue Chain = ChainedStore->getChain();
+ SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+
+ if (Chain != BetterChain) {
+ MadeChange = true;
+ BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
+ }
+ }
+
+ // Do all replacements after finding the replacements to make to avoid making
+ // the chains more complicated by introducing new TokenFactors.
+ for (auto Replacement : BetterChains)
+ replaceStoreChain(Replacement.first, Replacement.second);
+
+ return MadeChange;
+}
+
/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
CodeGenOpt::Level OptLevel) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 2b9ba2c..cfbb209 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -118,9 +118,9 @@ bool FastISel::lowerArguments() {
for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
E = FuncInfo.Fn->arg_end();
I != E; ++I) {
- DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
+ DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I);
assert(VI != LocalValueMap.end() && "Missed an argument?");
- FuncInfo.ValueMap[I] = VI->second;
+ FuncInfo.ValueMap[&*I] = VI->second;
}
return true;
}
@@ -611,7 +611,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
// have to worry about calling conventions and target-specific lowering code.
// Instead we perform the call lowering right here.
//
- // CALLSEQ_START(0)
+ // CALLSEQ_START(0...)
// STACKMAP(id, nbytes, ...)
// CALLSEQ_END(0, 0)
//
@@ -647,8 +647,11 @@ bool FastISel::selectStackmap(const CallInst *I) {
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
- .addImm(0);
+ auto Builder =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown));
+ const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
+ for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
+ Builder.addImm(0);
// Issue STACKMAP.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1100,13 +1103,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// The donothing intrinsic does, well, nothing.
case Intrinsic::donothing:
return true;
- case Intrinsic::eh_actions: {
- unsigned ResultReg = getRegForValue(UndefValue::get(II->getType()));
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
assert(DI->getVariable() && "Missing variable");
@@ -1326,12 +1322,38 @@ bool FastISel::selectBitCast(const User *I) {
return true;
}
+// Remove local value instructions starting from the instruction after
+// SavedLastLocalValue to the current function insert point.
+void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
+{
+ MachineInstr *CurLastLocalValue = getLastLocalValue();
+ if (CurLastLocalValue != SavedLastLocalValue) {
+ // Find the first local value instruction to be deleted.
+ // This is the instruction after SavedLastLocalValue if it is non-NULL.
+ // Otherwise it's the first instruction in the block.
+ MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
+ if (SavedLastLocalValue)
+ ++FirstDeadInst;
+ else
+ FirstDeadInst = FuncInfo.MBB->getFirstNonPHI();
+ setLastLocalValue(SavedLastLocalValue);
+ removeDeadCode(FirstDeadInst, FuncInfo.InsertPt);
+ }
+}
+
bool FastISel::selectInstruction(const Instruction *I) {
+ MachineInstr *SavedLastLocalValue = getLastLocalValue();
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
if (isa<TerminatorInst>(I))
- if (!handlePHINodesInSuccessorBlocks(I->getParent()))
+ if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
+ // PHI node handling may have generated local value instructions,
+ // even though it failed to handle all PHI nodes.
+ // We remove these instructions because SelectionDAGISel will generate
+ // them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
return false;
+ }
DbgLoc = I->getDebugLoc();
@@ -1348,7 +1370,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
LibInfo->hasOptimizedCodeGen(Func))
return false;
- // Don't handle Intrinsic::trap if a trap funciton is specified.
+ // Don't handle Intrinsic::trap if a trap function is specified.
if (F && F->getIntrinsicID() == Intrinsic::trap &&
Call->hasFnAttr("trap-func-name"))
return false;
@@ -1380,8 +1402,12 @@ bool FastISel::selectInstruction(const Instruction *I) {
DbgLoc = DebugLoc();
// Undo phi node updates, because they will be added again by SelectionDAG.
- if (isa<TerminatorInst>(I))
+ if (isa<TerminatorInst>(I)) {
+ // PHI node handling may have generated local value instructions.
+ // We remove them because SelectionDAGISel will generate them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ }
return false;
}
@@ -1398,11 +1424,30 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
SmallVector<MachineOperand, 0>(), DbgLoc);
}
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(),
- MSucc->getBasicBlock());
- FuncInfo.MBB->addSuccessor(MSucc, BranchWeight);
+ if (FuncInfo.BPI) {
+ auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+ FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(MSucc, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(MSucc);
+}
+
+void FastISel::finishCondBranch(const BasicBlock *BranchBB,
+ MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB) {
+ // Add TrueMBB as successor unless it is equal to the FalseMBB: This can
+ // happen in degenerate IR and MachineIR forbids to have a block twice in the
+ // successor/predecessor lists.
+ if (TrueMBB != FalseMBB) {
+ if (FuncInfo.BPI) {
+ auto BranchProbability =
+ FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
+ }
+
+ fastEmitBranch(FalseMBB, DbgLoc);
}
/// Emit an FNeg operation.
@@ -1864,21 +1909,18 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, const ConstantFP *FPImm) {
+unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ const ConstantFP *FPImm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
- Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
.addFPImm(FPImm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
.addFPImm(FPImm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
@@ -1912,35 +1954,6 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill, unsigned Op1,
- bool Op1IsKill, uint64_t Imm1,
- uint64_t Imm2) {
- const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
- unsigned ResultReg = createResultReg(RC);
- Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
- Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
-
- if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
- .addImm(Imm1)
- .addImm(Imm2);
- else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
- .addImm(Imm1)
- .addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
- }
- return ResultReg;
-}
-
unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
@@ -1957,25 +1970,6 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC, uint64_t Imm1,
- uint64_t Imm2) {
- unsigned ResultReg = createResultReg(RC);
- const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
- if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addImm(Imm1)
- .addImm(Imm2);
- else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1)
- .addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
- }
- return ResultReg;
-}
-
unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
bool Op0IsKill, uint32_t Idx) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index cc306cb..b62bd2b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -87,6 +87,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
MachineModuleInfo &MMI = MF->getMMI();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
@@ -103,28 +104,29 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- // Static allocas can be folded into the initial stack frame adjustment.
- if (AI->isStaticAlloca()) {
+ Type *Ty = AI->getAllocatedType();
+ unsigned Align =
+ std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // Static allocas can be folded into the initial stack frame
+ // adjustment. For targets that don't realign the stack, don't
+ // do this if there is an extra alignment requirement.
+ if (AI->isStaticAlloca() &&
+ (TFI->isStackRealignable() || (Align <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
- Type *Ty = AI->getAllocatedType();
uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
- unsigned Align =
- std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
- AI->getAlignment());
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
StaticAllocaMap[AI] =
MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
-
} else {
- unsigned Align =
- std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(
- AI->getAllocatedType()),
- AI->getAlignment());
- unsigned StackAlign =
- MF->getSubtarget().getFrameLowering()->getStackAlignment();
+ // FIXME: Overaligned static allocas should be grouped into
+ // a single dynamic allocation instead of using a separate
+ // stack allocation for each one.
if (Align <= StackAlign)
Align = 0;
// Inform the Frame Information that we have variable-sized objects.
@@ -134,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Look for inline asm that clobbers the SP register.
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(I);
+ ImmutableCallSite CS(&*I);
if (isa<InlineAsm>(CS.getCalledValue())) {
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -163,7 +165,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
MF->getFrameInfo()->setHasVAStart(true);
}
- // If we have a musttail call in a variadic funciton, we need to ensure we
+ // If we have a musttail call in a variadic function, we need to ensure we
// forward implicit register parameters.
if (const auto *CI = dyn_cast<CallInst>(I)) {
if (CI->isMustTailCall() && Fn->isVarArg())
@@ -172,10 +174,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
- if (isUsedOutsideOfDefiningBlock(I))
- if (!isa<AllocaInst>(I) ||
- !StaticAllocaMap.count(cast<AllocaInst>(I)))
- InitializeRegForValue(I);
+ if (isUsedOutsideOfDefiningBlock(&*I))
+ if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(&*I);
// Collect llvm.dbg.declare information. This is done now instead of
// during the initial isel pass through the IR so that it is done
@@ -205,15 +206,36 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
// Decide the preferred extend type for a value.
- PreferredExtendType[I] = getPreferredExtendForValue(I);
+ PreferredExtendType[&*I] = getPreferredExtendForValue(&*I);
}
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
// also creates the initial PHI MachineInstrs, though none of the input
// operands are populated.
for (BB = Fn->begin(); BB != EB; ++BB) {
- MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
- MBBMap[BB] = MBB;
+ // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
+ // are really data, and no instructions can live here.
+ if (BB->isEHPad()) {
+ const Instruction *I = BB->getFirstNonPHI();
+ // If this is a non-landingpad EH pad, mark this function as using
+ // funclets.
+ // FIXME: SEH catchpads do not create funclets, so we could avoid setting
+ // this in such cases in order to improve frame layout.
+ if (!isa<LandingPadInst>(I)) {
+ MMI.setHasEHFunclets(true);
+ MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+ }
+ if (isa<CatchSwitchInst>(I)) {
+ assert(&*BB->begin() == I &&
+ "WinEHPrepare failed to remove PHIs from imaginary BBs");
+ continue;
+ }
+ if (isa<FuncletPadInst>(I))
+ assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs");
+ }
+
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB);
+ MBBMap[&*BB] = MBB;
MF->push_back(MBB);
// Transfer the address-taken flag. This is necessary because there could
@@ -252,94 +274,64 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Mark landing pad blocks.
SmallVector<const LandingPadInst *, 4> LPads;
for (BB = Fn->begin(); BB != EB; ++BB) {
- if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
- MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
- if (BB->isLandingPad())
- LPads.push_back(BB->getLandingPadInst());
+ const Instruction *FNP = BB->getFirstNonPHI();
+ if (BB->isEHPad() && MBBMap.count(&*BB))
+ MBBMap[&*BB]->setIsEHPad();
+ if (const auto *LPI = dyn_cast<LandingPadInst>(FNP))
+ LPads.push_back(LPI);
}
- // If this is an MSVC EH personality, we need to do a bit more work.
- EHPersonality Personality = EHPersonality::Unknown;
- if (Fn->hasPersonalityFn())
- Personality = classifyEHPersonality(Fn->getPersonalityFn());
- if (!isMSVCEHPersonality(Personality))
+ // If this personality uses funclets, we need to do a bit more work.
+ if (!Fn->hasPersonalityFn())
+ return;
+ EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
+ if (!isFuncletEHPersonality(Personality))
return;
- if (Personality == EHPersonality::MSVC_Win64SEH ||
- Personality == EHPersonality::MSVC_X86SEH) {
- addSEHHandlersForLPads(LPads);
- }
-
- WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(&fn);
- if (Personality == EHPersonality::MSVC_CXX) {
- const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
- calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo);
- }
-
- // Copy the state numbers to LandingPadInfo for the current function, which
- // could be a handler or the parent. This should happen for 32-bit SEH and
- // C++ EH.
- if (Personality == EHPersonality::MSVC_CXX ||
- Personality == EHPersonality::MSVC_X86SEH) {
- for (const LandingPadInst *LP : LPads) {
- MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
- MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]);
- }
- }
-}
-
-void FunctionLoweringInfo::addSEHHandlersForLPads(
- ArrayRef<const LandingPadInst *> LPads) {
- MachineModuleInfo &MMI = MF->getMMI();
-
- // Iterate over all landing pads with llvm.eh.actions calls.
- for (const LandingPadInst *LP : LPads) {
- const IntrinsicInst *ActionsCall =
- dyn_cast<IntrinsicInst>(LP->getNextNode());
- if (!ActionsCall ||
- ActionsCall->getIntrinsicID() != Intrinsic::eh_actions)
- continue;
-
- // Parse the llvm.eh.actions call we found.
- MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
- SmallVector<std::unique_ptr<ActionHandler>, 4> Actions;
- parseEHActions(ActionsCall, Actions);
-
- // Iterate EH actions from most to least precedence, which means
- // iterating in reverse.
- for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) {
- ActionHandler *Action = I->get();
- if (auto *CH = dyn_cast<CatchHandler>(Action)) {
- const auto *Filter =
- dyn_cast<Function>(CH->getSelector()->stripPointerCasts());
- assert((Filter || CH->getSelector()->isNullValue()) &&
- "expected function or catch-all");
- const auto *RecoverBA =
- cast<BlockAddress>(CH->getHandlerBlockOrFunc());
- MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA);
+ // Calculate state numbers if we haven't already.
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+ if (Personality == EHPersonality::MSVC_CXX)
+ calculateWinCXXEHStateNumbers(&fn, EHInfo);
+ else if (isAsynchronousEHPersonality(Personality))
+ calculateSEHStateNumbers(&fn, EHInfo);
+ else if (Personality == EHPersonality::CoreCLR)
+ calculateClrEHStateNumbers(&fn, EHInfo);
+
+ calculateCatchReturnSuccessorColors(&fn, EHInfo);
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (H.CatchObj.Alloca) {
+ assert(StaticAllocaMap.count(H.CatchObj.Alloca));
+ H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca];
} else {
- assert(isa<CleanupHandler>(Action));
- const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc());
- MMI.addSEHCleanupHandler(LPadMBB, Fini);
+ H.CatchObj.FrameIndex = INT_MAX;
}
+ if (H.Handler)
+ H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
}
}
+ for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
+ if (UME.Cleanup)
+ UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+ for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
+ const BasicBlock *BB = UME.Handler.get<const BasicBlock *>();
+ UME.Handler = MBBMap[BB];
+ }
+ for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
+ const BasicBlock *BB = CME.Handler.get<const BasicBlock *>();
+ CME.Handler = MBBMap[BB];
+ }
}
/// clear - Clear out all the function-specific state. This returns this
/// FunctionLoweringInfo to an empty state, ready to be used for a
/// different function.
void FunctionLoweringInfo::clear() {
- assert(CatchInfoFound.size() == CatchInfoLost.size() &&
- "Not all catch info was assigned to a landing pad!");
-
MBBMap.clear();
ValueMap.clear();
StaticAllocaMap.clear();
-#ifndef NDEBUG
- CatchInfoLost.clear();
- CatchInfoFound.clear();
-#endif
LiveOutRegInfo.clear();
VisitedBBs.clear();
ArgDbgValues.clear();
@@ -520,6 +512,17 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
return 0;
}
+unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
+ const Value *CPI, const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ auto I = CatchPadExceptionPointers.insert({CPI, 0});
+ unsigned &VReg = I.first->second;
+ if (I.second)
+ VReg = MRI.createVirtualRegister(RC);
+ assert(VReg && "null vreg in exception pointer table!");
+ return VReg;
+}
+
/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
/// being passed to this variadic function, and set the MachineModuleInfo's
/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
@@ -547,10 +550,9 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
/// landingpad instruction and add them to the specified machine module info.
void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
MachineBasicBlock *MBB) {
- MMI.addPersonality(
- MBB,
- cast<Function>(
- I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()));
+ if (const auto *PF = dyn_cast<Function>(
+ I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
+ MMI.addPersonality(PF);
if (I.isCleanup())
MMI.addCleanup(MBB);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 5ec1030..a1e2d41 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
UseRC = RC;
else if (RC) {
const TargetRegisterClass *ComRC =
- TRI->getCommonSubClass(UseRC, RC);
+ TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy);
// If multiple uses expect disjoint register classes, we emit
// copies in AddRegisterOperand.
if (ComRC)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index fbc8f1e..f46767f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -39,6 +39,10 @@ using namespace llvm;
#define DEBUG_TYPE "legalizedag"
+namespace {
+
+struct FloatSignAsInt;
+
//===----------------------------------------------------------------------===//
/// This takes an arbitrary SelectionDAG as input and
/// hacks on it until the target machine can handle it. This involves
@@ -51,7 +55,6 @@ using namespace llvm;
/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
/// will attempt merge setcc and brc instructions into brcc's.
///
-namespace {
class SelectionDAGLegalize {
const TargetMachine &TM;
const TargetLowering &TLI;
@@ -130,7 +133,11 @@ private:
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
SmallVectorImpl<SDValue> &Results);
- SDValue ExpandFCOPYSIGN(SDNode *Node);
+ void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const;
+ SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL,
+ SDValue NewIntValue) const;
+ SDValue ExpandFCOPYSIGN(SDNode *Node) const;
+ SDValue ExpandFABS(SDNode *Node) const;
SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
SDLoc dl);
SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
@@ -138,6 +145,7 @@ private:
SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
SDLoc dl);
+ SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl);
SDValue ExpandBSWAP(SDValue Op, SDLoc dl);
SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl);
@@ -146,10 +154,11 @@ private:
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+ SDValue ExpandConstant(ConstantSDNode *CP);
- std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
-
- void ExpandNode(SDNode *Node);
+ // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall
+ bool ExpandNode(SDNode *Node);
+ void ConvertNodeToLibcall(SDNode *Node);
void PromoteNode(SDNode *Node);
public:
@@ -273,17 +282,30 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend) {
- SDValue Result =
- DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
- DAG.getEntryNode(),
- CPIdx, MachinePointerInfo::getConstantPool(),
- VT, false, false, false, Alignment);
+ SDValue Result = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT,
+ false, false, false, Alignment);
return Result;
}
SDValue Result =
- DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(), false, false, false,
- Alignment);
+ DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
+ return Result;
+}
+
+/// Expands the Constant node to a load from the constant pool.
+SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
+ SDLoc dl(CP);
+ EVT VT = CP->getValueType(0);
+ SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
+ TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue Result =
+ DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
return Result;
}
@@ -594,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Store the vector.
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI),
- false, false, 0);
+ SDValue Ch = DAG.getStore(
+ DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
+ false, 0);
// Truncate or zero extend offset to target pointer type.
- unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
- Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+ Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
// Add the offset to the index.
unsigned EltSize = EltVT.getSizeInBits()/8;
Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
@@ -610,9 +632,9 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the updated vector.
- return DAG.getLoad(VT, dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI), false, false,
- false, 0);
+ return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), SPFI),
+ false, false, false, 0);
}
@@ -728,14 +750,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
case TargetLowering::Legal: {
// If this is an unaligned store and the target doesn't support it,
// expand it.
+ EVT MemVT = ST->getMemoryVT();
unsigned AS = ST->getAddressSpace();
unsigned Align = ST->getAlignment();
- if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
- if (Align < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
- }
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
break;
}
case TargetLowering::Custom: {
@@ -839,20 +859,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
ReplaceNode(SDValue(Node, 0), Result);
} else {
- switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
- StVT.getSimpleVT())) {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
+ EVT MemVT = ST->getMemoryVT();
unsigned AS = ST->getAddressSpace();
unsigned Align = ST->getAlignment();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DL.getABITypeAlignment(Ty);
- if (Align < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
- }
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
break;
}
case TargetLowering::Custom: {
@@ -895,17 +911,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
+ EVT MemVT = LD->getMemoryVT();
unsigned AS = LD->getAddressSpace();
unsigned Align = LD->getAlignment();
+ const DataLayout &DL = DAG.getDataLayout();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
- if (Align < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
- }
- }
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
break;
}
case TargetLowering::Custom: {
@@ -1092,23 +1105,20 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Res.getValue(1);
}
} else {
- // If this is an unaligned load and the target doesn't support
- // it, expand it.
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
EVT MemVT = LD->getMemoryVT();
unsigned AS = LD->getAddressSpace();
unsigned Align = LD->getAlignment();
- if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
- if (Align < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
- }
- }
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
}
break;
}
case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
+ EVT DestVT = Node->getValueType(0);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
// If the source type is not legal, see if there is a legal extload to
// an intermediate type that we can then extend further.
EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
@@ -1127,6 +1137,23 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Load.getValue(1);
break;
}
+
+ // Handle the special case of fp16 extloads. EXTLOAD doesn't have the
+ // normal undefined upper bits behavior to allow using an in-reg extend
+ // with the illegal FP type, so load as an integer and do the
+ // from-integer conversion.
+ if (SrcVT.getScalarType() == MVT::f16) {
+ EVT ISrcVT = SrcVT.changeTypeToInteger();
+ EVT IDestVT = DestVT.changeTypeToInteger();
+ EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
+ Chain, Ptr, ISrcVT,
+ LD->getMemOperand());
+ Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+ Chain = Result.getValue(1);
+ break;
+ }
}
assert(!SrcVT.isVector() &&
@@ -1180,15 +1207,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
#ifndef NDEBUG
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
- TargetLowering::TypeLegal &&
+ assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal ||
+ TLI.isTypeLegal(Node->getValueType(i))) &&
"Unexpected illegal type!");
for (const SDValue &Op : Node->op_values())
- assert((TLI.getTypeAction(*DAG.getContext(),
- Op.getValueType()) == TargetLowering::TypeLegal ||
- Op.getOpcode() == ISD::TargetConstant) &&
- "Unexpected illegal type!");
+ assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
+ TargetLowering::TypeLegal ||
+ TLI.isTypeLegal(Op.getValueType()) ||
+ Op.getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
#endif
// Figure out the correct action; the way to query this varies by opcode
@@ -1201,6 +1230,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STACKSAVE:
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ break;
case ISD::VAARG:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getValueType(0));
@@ -1229,7 +1262,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SETCC:
case ISD::BR_CC: {
unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
- Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ Node->getOpcode() == ISD::SETCC ? 2 :
+ Node->getOpcode() == ISD::SETCCE ? 3 : 1;
unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
@@ -1265,6 +1299,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_SETUP_DISPATCH:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1281,6 +1316,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
+ case ISD::READCYCLECOUNTER:
+ // READCYCLECOUNTER returns an i64, even if type legalization might have
+ // expanded that to several smaller types.
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
+ break;
case ISD::READ_REGISTER:
case ISD::WRITE_REGISTER:
// Named register is legal in the DAG, but blocked by register name
@@ -1379,7 +1419,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
// FALL THROUGH
case TargetLowering::Expand:
- ExpandNode(Node);
+ if (ExpandNode(Node))
+ return;
+ // FALL THROUGH
+ case TargetLowering::LibCall:
+ ConvertNodeToLibcall(Node);
return;
case TargetLowering::Promote:
PromoteNode(Node);
@@ -1419,6 +1463,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in
// the vector. If all are expanded here, we don't want one store per vector
// element.
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
SDValue StackPtr, Ch;
for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
@@ -1433,6 +1482,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode()))
continue;
+ // If the index is dependent on the store we will introduce a cycle when
+ // creating the load (the load uses the index, and by replacing the chain
+ // we will make the index dependent on the load).
+ if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist))
+ continue;
+
StackPtr = ST->getBasePtr();
Ch = SDValue(ST, 0);
break;
@@ -1490,7 +1545,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// First store the whole vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
@@ -1528,7 +1584,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
SDLoc dl(Node);
SDValue FIPtr = DAG.CreateStackTemporary(VT);
int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
@@ -1568,69 +1625,143 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
false, false, false, 0);
}
-SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
- SDLoc dl(Node);
- SDValue Tmp1 = Node->getOperand(0);
- SDValue Tmp2 = Node->getOperand(1);
-
- // Get the sign bit of the RHS. First obtain a value that has the same
- // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
- SDValue SignBit;
- EVT FloatVT = Tmp2.getValueType();
- EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+namespace {
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+ EVT FloatVT;
+ SDValue Chain;
+ SDValue FloatPtr;
+ SDValue IntPtr;
+ MachinePointerInfo IntPointerInfo;
+ MachinePointerInfo FloatPointerInfo;
+ SDValue IntValue;
+ APInt SignMask;
+};
+}
+
+/// Bitcast a floating-point value to an integer value. Only bitcast the part
+/// containing the sign bit if the target has no integer value capable of
+/// holding all bits of the floating-point value.
+void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
+ SDLoc DL, SDValue Value) const {
+ EVT FloatVT = Value.getValueType();
+ unsigned NumBits = FloatVT.getSizeInBits();
+ State.FloatVT = FloatVT;
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+ // Convert to an integer of the same size.
if (TLI.isTypeLegal(IVT)) {
- // Convert to an integer with the same sign bit.
- SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
+ State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
+ State.SignMask = APInt::getSignBit(NumBits);
+ return;
+ }
+
+ auto &DataLayout = DAG.getDataLayout();
+ // Store the float to memory, then load the sign part out as an integer.
+ MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8);
+ // First create a temporary that is aligned for both the load and store.
+ SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ // Then store the float to it.
+ State.FloatPtr = StackPtr;
+ MachineFunction &MF = DAG.getMachineFunction();
+ State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
+ State.FloatPointerInfo, false, false, 0);
+
+ SDValue IntPtr;
+ if (DataLayout.isBigEndian()) {
+ assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+ // Load out a legal integer with the same sign bit as the float.
+ IntPtr = StackPtr;
+ State.IntPointerInfo = State.FloatPointerInfo;
} else {
- auto &DL = DAG.getDataLayout();
- // Store the float to memory, then load the sign part out as an integer.
- MVT LoadTy = TLI.getPointerTy(DL);
- // First create a temporary that is aligned for both the load and store.
- SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
- // Then store the float to it.
- SDValue Ch =
- DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
- false, false, 0);
- if (DL.isBigEndian()) {
- assert(FloatVT.isByteSized() && "Unsupported floating point type!");
- // Load out a legal integer with the same sign bit as the float.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
- false, false, false, 0);
- } else { // Little endian
- SDValue LoadPtr = StackPtr;
- // The float may be wider than the integer we are going to load. Advance
- // the pointer so that the loaded integer will contain the sign bit.
- unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
- unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
- LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr,
- DAG.getConstant(ByteOffset, dl,
- LoadPtr.getValueType()));
- // Load a legal integer containing the sign bit.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
- false, false, false, 0);
- // Move the sign bit to the top bit of the loaded integer.
- unsigned BitShift = LoadTy.getSizeInBits() -
- (FloatVT.getSizeInBits() - 8 * ByteOffset);
- assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
- if (BitShift)
- SignBit = DAG.getNode(
- ISD::SHL, dl, LoadTy, SignBit,
- DAG.getConstant(BitShift, dl,
- TLI.getShiftAmountTy(SignBit.getValueType(), DL)));
- }
+ // Advance the pointer so that the loaded byte will contain the sign bit.
+ unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
+ IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+ State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
+ ByteOffset);
}
- // Now get the sign bit proper, by seeing whether the value is negative.
- SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()),
- SignBit,
- DAG.getConstant(0, dl, SignBit.getValueType()),
- ISD::SETLT);
- // Get the absolute value of the result.
- SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
- // Select between the nabs and abs value based on the sign bit of
- // the input.
- return DAG.getSelect(dl, AbsVal.getValueType(), SignBit,
- DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
- AbsVal);
+
+ State.IntPtr = IntPtr;
+ State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain,
+ IntPtr, State.IntPointerInfo, MVT::i8,
+ false, false, false, 0);
+ State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
+}
+
+/// Replace the integer value produced by getSignAsIntValue() with a new value
+/// and cast the result back to a floating-point type.
+SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
+ SDLoc DL, SDValue NewIntValue) const {
+ if (!State.Chain)
+ return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
+
+ // Override the part containing the sign bit in the value stored on the stack.
+ SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr,
+ State.IntPointerInfo, MVT::i8, false, false,
+ 0);
+ return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
+ State.FloatPointerInfo, false, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Mag = Node->getOperand(0);
+ SDValue Sign = Node->getOperand(1);
+
+ // Get sign bit into an integer value.
+ FloatSignAsInt SignAsInt;
+ getSignAsIntValue(SignAsInt, DL, Sign);
+
+ EVT IntVT = SignAsInt.IntValue.getValueType();
+ SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue,
+ SignMask);
+
+ // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X)
+ EVT FloatVT = Mag.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) {
+ SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag);
+ SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue);
+ SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit,
+ DAG.getConstant(0, DL, IntVT), ISD::SETNE);
+ return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
+ }
+
+ // Transform values to integer, copy the sign bit and transform back.
+ FloatSignAsInt MagAsInt;
+ getSignAsIntValue(MagAsInt, DL, Mag);
+ assert(SignAsInt.SignMask == MagAsInt.SignMask);
+ SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue,
+ ClearSignMask);
+ SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit);
+
+ return modifySignAsInt(MagAsInt, DL, CopiedSign);
+}
+
+SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Value = Node->getOperand(0);
+
+ // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal.
+ EVT FloatVT = Value.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) {
+ SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT);
+ return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero);
+ }
+
+ // Transform value to integer, clear the sign bit and transform back.
+ FloatSignAsInt ValueAsInt;
+ getSignAsIntValue(ValueAsInt, DL, Value);
+ EVT IntVT = ValueAsInt.IntValue.getValueType();
+ SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue,
+ ClearSignMask);
+ return modifySignAsInt(ValueAsInt, DL, ClearedSign);
}
void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
@@ -1798,7 +1929,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
int SPFI = StackPtrFI->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
unsigned SlotSize = SlotVT.getSizeInBits();
@@ -1838,14 +1970,14 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
int SPFI = StackPtrFI->getIndex();
- SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
- StackPtr,
- MachinePointerInfo::getFixedStack(SPFI),
- Node->getValueType(0).getVectorElementType(),
- false, false, 0);
- return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI),
- false, false, false, 0);
+ SDValue Ch = DAG.getTruncStore(
+ DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI),
+ Node->getValueType(0).getVectorElementType(), false, false, 0);
+ return DAG.getLoad(
+ Node->getValueType(0), dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
+ false, false, 0);
}
static bool
@@ -2011,9 +2143,10 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
SDValue CPIdx =
DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ return DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, Alignment);
}
SmallSet<SDValue, 16> DefinedValues;
@@ -2205,47 +2338,6 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
return ExpandLibCall(LC, Node, isSigned);
}
-/// Return true if divmod libcall is available.
-static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
- const TargetLowering &TLI) {
- RTLIB::Libcall LC;
- switch (Node->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
- case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
- case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
- case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
- case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
- }
-
- return TLI.getLibcallName(LC) != nullptr;
-}
-
-/// Only issue divrem libcall if both quotient and remainder are needed.
-static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
- // The other use might have been replaced with a divrem already.
- unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
- unsigned OtherOpcode = 0;
- if (isSigned)
- OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
- else
- OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
-
- SDValue Op0 = Node->getOperand(0);
- SDValue Op1 = Node->getOperand(1);
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
- if (User == Node)
- continue;
- if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) &&
- User->getOperand(0) == Op0 &&
- User->getOperand(1) == Op1)
- return true;
- }
- return false;
-}
-
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
void
SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
@@ -2428,6 +2520,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Op0,
EVT DestVT,
SDLoc dl) {
+ // TODO: Should any fast-math-flags be set for the created nodes?
+
if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
// simple 32-bit [signed|unsigned] integer to float/double expansion
@@ -2611,14 +2705,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
Alignment = std::min(Alignment, 4u);
SDValue FudgeInReg;
if (DestVT == MVT::f32)
- FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ FudgeInReg = DAG.getLoad(
+ MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, Alignment);
else {
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
- DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, false, Alignment);
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ false, false, false, Alignment);
HandleSDNode Handle(Load);
LegalizeOp(Load.getNode());
FudgeInReg = Handle.getValue();
@@ -2713,6 +2808,31 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
}
+/// Open code the operations for BITREVERSE.
+SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2;
+ Tmp = DAG.getConstant(0, dl, VT);
+ for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
+ if (I < J)
+ Tmp2 =
+ DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
+ else
+ Tmp2 =
+ DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
+
+ APInt Shift(Sz, 1);
+ Shift = Shift.shl(J);
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
+ }
+
+ return Tmp;
+}
+
/// Open code the operations for BSWAP of the specified operation.
SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
EVT VT = Op.getValueType();
@@ -2865,16 +2985,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
}
}
-std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
- unsigned Opc = Node->getOpcode();
- MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
- return ExpandChainLibCall(LC, Node, false);
-}
-
-void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2888,6 +2999,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
Results.push_back(Tmp1);
break;
+ case ISD::BITREVERSE:
+ Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
+ break;
case ISD::BSWAP:
Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
break;
@@ -2908,30 +3022,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// preserve the chain and be done.
Results.push_back(Node->getOperand(0));
break;
+ case ISD::READCYCLECOUNTER:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.append(Node->getNumValues() - 1,
+ DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ break;
case ISD::EH_SJLJ_SETJMP:
// If the target didn't expand this, just return 'zero' and preserve the
// chain.
Results.push_back(DAG.getConstant(0, dl, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
- case ISD::ATOMIC_FENCE: {
- // If the target didn't lower this, lower it to '__sync_synchronize()' call
- // FIXME: handle "fence singlethread" more efficiently.
- TargetLowering::ArgListTy Args;
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__sync_synchronize",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0);
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
- Results.push_back(CallResult.second);
- break;
- }
case ISD::ATOMIC_LOAD: {
// There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0));
@@ -2959,26 +3062,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Swap.getValue(1));
break;
}
- // By default, atomic intrinsics are marked Legal and lowered. Targets
- // which don't support them directly, however, may want libcalls, in which
- // case they mark them Expand, and we get here.
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_CMP_SWAP: {
- std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
- Results.push_back(Tmp.first);
- Results.push_back(Tmp.second);
- break;
- }
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
// Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
// splits out the success value as a comparison. Expanding the resulting
@@ -3017,21 +3100,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
- case ISD::TRAP: {
- // If this operation is not supported, lower it to 'abort()' call
- TargetLowering::ArgListTy Args;
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("abort",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0);
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
- Results.push_back(CallResult.second);
- break;
- }
case ISD::FP_ROUND:
case ISD::BITCAST:
Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
@@ -3097,6 +3165,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(0),
Tmp1, ISD::SETLT);
True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ // TODO: Should any fast-math-flags be set for the FSUB?
False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
DAG.getNode(ISD::FSUB, dl, VT,
Node->getOperand(0), Tmp1));
@@ -3106,57 +3175,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::VAARG: {
- const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
- EVT VT = Node->getValueType(0);
- Tmp1 = Node->getOperand(0);
- Tmp2 = Node->getOperand(1);
- unsigned Align = Node->getConstantOperandVal(3);
-
- SDValue VAListLoad =
- DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2,
- MachinePointerInfo(V), false, false, false, 0);
- SDValue VAList = VAListLoad;
-
- if (Align > TLI.getMinStackArgumentAlignment()) {
- assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
-
- VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- DAG.getConstant(Align - 1, dl,
- VAList.getValueType()));
-
- VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList,
- DAG.getConstant(-(int64_t)Align, dl,
- VAList.getValueType()));
- }
-
- // Increment the pointer, VAList, to the next vaarg
- Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(
- VT.getTypeForEVT(*DAG.getContext())),
- dl, VAList.getValueType()));
- // Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
- MachinePointerInfo(V), false, false, 0);
- // Load the actual argument out of the pointer VAList
- Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
- false, false, false, 0));
+ case ISD::VAARG:
+ Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
break;
- }
- case ISD::VACOPY: {
- // This defaults to loading a pointer from the input and storing it to the
- // output, returning the chain.
- const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
- const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
- Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl,
- Node->getOperand(0), Node->getOperand(2),
- MachinePointerInfo(VS), false, false, false, 0);
- Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
- MachinePointerInfo(VD), false, false, 0);
- Results.push_back(Tmp1);
+ case ISD::VACOPY:
+ Results.push_back(DAG.expandVACopy(Node));
break;
- }
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.
@@ -3302,28 +3327,24 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Node->getOperand(0));
}
break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(0));
+ break;
case ISD::FCOPYSIGN:
Results.push_back(ExpandFCOPYSIGN(Node));
break;
case ISD::FNEG:
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
+ // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
Node->getOperand(0));
Results.push_back(Tmp1);
break;
- case ISD::FABS: {
- // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
- EVT VT = Node->getValueType(0);
- Tmp1 = Node->getOperand(0);
- Tmp2 = DAG.getConstantFP(0.0, dl, VT);
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()),
- Tmp1, Tmp2, ISD::SETUGT);
- Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
- Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3);
- Results.push_back(Tmp1);
+ case ISD::FABS:
+ Results.push_back(ExpandFABS(Node));
break;
- }
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -3344,25 +3365,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
- case ISD::FMINNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
- RTLIB::FMIN_F80, RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128));
- break;
- case ISD::FMAXNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
- RTLIB::FMAX_F80, RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128));
- break;
- case ISD::FSQRT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128));
- break;
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
- bool isSIN = Node->getOpcode() == ISD::FSIN;
// Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
// fcos which share the same operand and both are used.
if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
@@ -3370,137 +3375,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
&& useSinCos(Node)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
- if (!isSIN)
+ if (Node->getOpcode() == ISD::FCOS)
Tmp1 = Tmp1.getValue(1);
Results.push_back(Tmp1);
- } else if (isSIN) {
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128));
- } else {
- Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_F128,
- RTLIB::COS_PPCF128));
}
break;
}
- case ISD::FSINCOS:
- // Expand into sincos libcall.
- ExpandSinCosLibCall(Node, Results);
- break;
- case ISD::FLOG:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128));
- break;
- case ISD::FLOG2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128));
- break;
- case ISD::FLOG10:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128));
- break;
- case ISD::FEXP:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128));
- break;
- case ISD::FEXP2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128));
- break;
- case ISD::FTRUNC:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128));
- break;
- case ISD::FFLOOR:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128));
- break;
- case ISD::FCEIL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128));
- break;
- case ISD::FRINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128));
- break;
- case ISD::FNEARBYINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128));
- break;
- case ISD::FROUND:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128));
- break;
- case ISD::FPOWI:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128));
- break;
- case ISD::FPOW:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128));
- break;
- case ISD::FDIV:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
- RTLIB::DIV_F80, RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128));
- break;
- case ISD::FREM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
- RTLIB::REM_F80, RTLIB::REM_F128,
- RTLIB::REM_PPCF128));
- break;
- case ISD::FMA:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
- RTLIB::FMA_F80, RTLIB::FMA_F128,
- RTLIB::FMA_PPCF128));
- break;
case ISD::FMAD:
llvm_unreachable("Illegal fmad should never be formed");
- case ISD::FADD:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128));
- break;
- case ISD::FMUL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
- RTLIB::MUL_F80, RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128));
- break;
- case ISD::FP16_TO_FP: {
- if (Node->getValueType(0) == MVT::f32) {
- Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
- break;
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) != MVT::f32) {
+ // We can extend to types bigger than f32 in two steps without changing
+ // the result. Since "f16 -> f32" is much more commonly available, give
+ // CodeGen the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
}
-
- // We can extend to types bigger than f32 in two steps without changing the
- // result. Since "f16 -> f32" is much more commonly available, give CodeGen
- // the option of emitting that before resorting to a libcall.
- SDValue Res =
- DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
- Results.push_back(
- DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
break;
- }
- case ISD::FP_TO_FP16: {
+ case ISD::FP_TO_FP16:
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
@@ -3512,16 +3407,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl));
Results.push_back(
DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal));
- break;
}
}
-
- RTLIB::Libcall LC =
- RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
- Results.push_back(ExpandLibCall(LC, Node, false));
break;
- }
case ISD::ConstantFP: {
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
@@ -3530,17 +3418,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(ExpandConstantFP(CFP, true));
break;
}
+ case ISD::Constant: {
+ ConstantSDNode *CP = cast<ConstantSDNode>(Node);
+ Results.push_back(ExpandConstant(CP));
+ break;
+ }
case ISD::FSUB: {
EVT VT = Node->getValueType(0);
if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags;
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
- Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
Results.push_back(Tmp1);
- } else {
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
- RTLIB::SUB_F80, RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128));
}
break;
}
@@ -3564,29 +3454,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
Tmp2 = Node->getOperand(0);
Tmp3 = Node->getOperand(1);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
- (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- // If div is legal, it's better to do the normal expansion
- !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
- useDivRem(Node, isSigned, false))) {
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ Results.push_back(Tmp1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
- } else if (isSigned)
- Tmp1 = ExpandIntLibCall(Node, true,
- RTLIB::SREM_I8,
- RTLIB::SREM_I16, RTLIB::SREM_I32,
- RTLIB::SREM_I64, RTLIB::SREM_I128);
- else
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::UREM_I8,
- RTLIB::UREM_I16, RTLIB::UREM_I32,
- RTLIB::UREM_I64, RTLIB::UREM_I128);
- Results.push_back(Tmp1);
+ Results.push_back(Tmp1);
+ }
break;
}
case ISD::UDIV:
@@ -3594,23 +3472,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
bool isSigned = Node->getOpcode() == ISD::SDIV;
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
EVT VT = Node->getValueType(0);
- SDVTList VTs = DAG.getVTList(VT, VT);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
- (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- useDivRem(Node, isSigned, true)))
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
- else if (isSigned)
- Tmp1 = ExpandIntLibCall(Node, true,
- RTLIB::SDIV_I8,
- RTLIB::SDIV_I16, RTLIB::SDIV_I32,
- RTLIB::SDIV_I64, RTLIB::SDIV_I128);
- else
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::UDIV_I8,
- RTLIB::UDIV_I16, RTLIB::UDIV_I32,
- RTLIB::UDIV_I64, RTLIB::UDIV_I128);
- Results.push_back(Tmp1);
+ Results.push_back(Tmp1);
+ }
break;
}
case ISD::MULHU:
@@ -3626,11 +3493,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1.getValue(1));
break;
}
- case ISD::SDIVREM:
- case ISD::UDIVREM:
- // Expand into divrem libcall
- ExpandDivRemLibCall(Node, Results);
- break;
case ISD::MUL: {
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3673,14 +3535,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
- break;
}
-
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::MUL_I8,
- RTLIB::MUL_I16, RTLIB::MUL_I32,
- RTLIB::MUL_I64, RTLIB::MUL_I128);
- Results.push_back(Tmp1);
break;
}
case ISD::SADDO:
@@ -3867,9 +3722,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
- MachinePointerInfo::getJumpTable(), MemVT,
- false, false, false, 0);
+ SDValue LD = DAG.getExtLoad(
+ ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT,
+ false, false, false, 0);
Addr = LD;
if (TM.getRelocationModel() == Reloc::PIC_) {
// For PIC, the sequence is:
@@ -4092,16 +3948,276 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
// Replace the original node with the legalized result.
+ if (Results.empty())
+ return false;
+
+ ReplaceNode(Node, Results.data());
+ return true;
+}
+
+void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ unsigned Opc = Node->getOpcode();
+ switch (Opc) {
+ case ISD::ATOMIC_FENCE: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
+ TargetLowering::ArgListTy Args;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
+
+ std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("abort",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FMINNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
+ break;
+ case ISD::FMAXNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
+ break;
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
+ break;
+ case ISD::FCOS:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
+ break;
+ case ISD::FSINCOS:
+ // Expand into sincos libcall.
+ ExpandSinCosLibCall(Node, Results);
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FROUND:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
+ break;
+ case ISD::FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128));
+ break;
+ case ISD::FADD:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+ break;
+ case ISD::FMUL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
+ break;
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) == MVT::f32) {
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ }
+ break;
+ case ISD::FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
+ Results.push_back(ExpandLibCall(LC, Node, false));
+ break;
+ }
+ case ISD::FSUB:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
+ break;
+ case ISD::SREM:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128));
+ break;
+ case ISD::UREM:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128));
+ break;
+ case ISD::SDIV:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+ break;
+ case ISD::UDIV:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+ break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
+ case ISD::MUL:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128));
+ break;
+ }
+
+ // Replace the original node with the legalized result.
if (!Results.empty())
ReplaceNode(Node, Results.data());
}
+// Determine the vector type to use in place of an original scalar element when
+// promoting equally sized vectors.
+static MVT getPromotedVectorElementType(const TargetLowering &TLI,
+ MVT EltVT, MVT NewEltVT) {
+ unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits();
+ MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt);
+ assert(TLI.isTypeLegal(MidVT) && "unexpected");
+ return MidVT;
+}
+
void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
SmallVector<SDValue, 8> Results;
MVT OVT = Node->getSimpleValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
Node->getOpcode() == ISD::SINT_TO_FP ||
- Node->getOpcode() == ISD::SETCC) {
+ Node->getOpcode() == ISD::SETCC ||
+ Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
OVT = Node->getOperand(0).getSimpleValueType();
}
if (Node->getOpcode() == ISD::BR_CC)
@@ -4284,11 +4400,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FREM:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FCOPYSIGN:
case ISD::FPOW: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
- Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
+ Node->getFlags());
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
@@ -4303,12 +4419,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl)));
break;
}
+ case ISD::FCOPYSIGN:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = Node->getOperand(1);
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+
+ // fcopysign doesn't change anything but the sign bit, so
+ // (fp_round (fcopysign (fpext a), b))
+ // is as precise as
+ // (fp_round (fpext a))
+ // which is a no-op. Mark it as a TRUNCating FP_ROUND.
+ const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
- Tmp3, DAG.getIntPtrConstant(0, dl)));
+ Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
break;
}
case ISD::FFLOOR:
@@ -4333,6 +4457,157 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp2, DAG.getIntPtrConstant(0, dl)));
break;
}
+ case ISD::BUILD_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = build_vector i64:x, i64:y => v4i32
+ // =>
+ // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for build_vector");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
+ SDValue Op = Node->getOperand(I);
+ NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op));
+ }
+
+ SDLoc SL(Node);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size.
+ //
+ // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for extract_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Idx = Node->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVec, TmpIdx);
+ NewOps.push_back(Elt);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps);
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ // v2i32:casty = bitcast y:i64
+ //
+ // v2i64 = bitcast
+ // (v4i32 insert_vector_elt
+ // (v4i32 insert_vector_elt v4i32:castx,
+ // (extract_vector_elt casty, 0), 2 * z),
+ // (extract_vector_elt casty, 1), (2 * z + 1))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for insert_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Val = Node->getOperand(1);
+ SDValue Idx = Node->getOperand(2);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+
+ SDValue NewVec = CastVec;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVal, IdxOffset);
+
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT,
+ NewVec, Elt, InEltIdx);
+ }
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to different vector type with the smae total bit size.
+ //
+ // e.g. v2i64 = scalar_to_vector x:i64
+ // =>
+ // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef)
+ //
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ SDValue Val = Node->getOperand(0);
+ SDLoc SL(Node);
+
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+ SDValue Undef = DAG.getUNDEF(MidVT);
+
+ SmallVector<SDValue, 8> NewElts;
+ NewElts.push_back(CastVal);
+ for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I)
+ NewElts.push_back(Undef);
+
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
}
// Replace the original node with the legalized result.
@@ -4356,7 +4631,7 @@ void SelectionDAG::Legalize() {
for (auto NI = allnodes_end(); NI != allnodes_begin();) {
--NI;
- SDNode *N = NI;
+ SDNode *N = &*NI;
if (N->use_empty() && N != getRoot().getNode()) {
++NI;
DeleteNode(N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3c50a41..6c0193a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -43,10 +43,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
}
//===----------------------------------------------------------------------===//
-// Result Float to Integer Conversion.
+// Convert Float Results to Integer for Non-HW-supported Operations.
//===----------------------------------------------------------------------===//
-void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
dbgs() << "\n");
SDValue R = SDValue();
@@ -59,20 +59,26 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
+ case ISD::Register:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ assert(isLegalInHWReg(N->getValueType(ResNo)) &&
+ "Unsupported SoftenFloatRes opcode!");
+ // Only when isLegalInHWReg, we can skip check of the operands.
+ R = SDValue(N, ResNo);
+ break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
- case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
- case ISD::ConstantFP:
- R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
- break;
+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
- case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
- case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break;
case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
@@ -84,7 +90,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
- case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
@@ -97,9 +103,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
- case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
- case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
- case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
@@ -107,11 +113,19 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
}
// If R is null, the sub-method took care of registering the result.
- if (R.getNode())
+ if (R.getNode()) {
SetSoftenedFloat(SDValue(N, ResNo), R);
+ ReplaceSoftenFloatResult(N, ResNo, R);
+ }
+ // Return true only if the node is changed,
+ // assuming that the operands are also converted when necessary.
+ // Otherwise, return false to tell caller to scan operands.
+ return R.getNode() && R.getNode() != N;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
return BitConvertToInteger(N->getOperand(0));
}
@@ -130,10 +144,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
BitConvertToInteger(N->getOperand(1)));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
- return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N),
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, we can load better from the constant pool.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
TLI.getTypeToTransformTo(*DAG.getContext(),
- N->getValueType(0)));
+ CN->getValueType(0)));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -143,7 +161,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
NewOp, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FABS can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned Size = NVT.getSizeInBits();
@@ -165,7 +186,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
RTLIB::FMIN_F80,
RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
@@ -178,7 +199,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
RTLIB::FMAX_F80,
RTLIB::FMAX_F128,
RTLIB::FMAX_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
@@ -191,7 +212,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
RTLIB::ADD_F80,
RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
@@ -203,10 +224,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
RTLIB::CEIL_F80,
RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue LHS = GetSoftenedFloat(N->getOperand(0));
SDValue RHS = BitConvertToInteger(N->getOperand(1));
SDLoc dl(N);
@@ -263,7 +287,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
RTLIB::COS_F80,
RTLIB::COS_F128,
RTLIB::COS_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
@@ -276,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
@@ -288,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
RTLIB::EXP_F80,
RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
@@ -300,7 +324,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
RTLIB::EXP2_F80,
RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
@@ -312,7 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
RTLIB::FLOOR_F80,
RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
@@ -324,7 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
RTLIB::LOG_F80,
RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
@@ -336,7 +360,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
RTLIB::LOG2_F80,
RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
@@ -348,7 +372,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
RTLIB::LOG10_F80,
RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -362,7 +386,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- NVT, Ops, 3, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
@@ -375,7 +399,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
@@ -387,10 +411,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
RTLIB::NEARBYINT_F80,
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FNEG can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
@@ -402,7 +429,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, dl).first;
+ NVT, Ops, false, dl).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -418,11 +445,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SoftenFloatResult(Op.getNode(), 0);
}
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
+ Op = GetPromotedFloat(Op);
+ // If the promotion did the FP_EXTEND to the destination type for us,
+ // there's nothing left to do here.
+ if (Op.getValueType() == N->getValueType(0)) {
+ return BitConvertToInteger(Op);
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
Op = GetSoftenedFloat(Op);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -430,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
- SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1,
+ SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
false, SDLoc(N)).first;
if (N->getValueType(0) == MVT::f32)
return Res32;
@@ -438,7 +474,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -452,7 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
@@ -465,7 +501,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_F80,
RTLIB::POW_F128,
RTLIB::POW_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -479,7 +515,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
RTLIB::POWI_F80,
RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
@@ -492,7 +528,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
RTLIB::REM_F80,
RTLIB::REM_F128,
RTLIB::REM_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
@@ -504,7 +540,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
RTLIB::RINT_F80,
RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
@@ -516,7 +552,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
RTLIB::ROUND_F80,
RTLIB::ROUND_F128,
RTLIB::ROUND_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
@@ -528,7 +564,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
RTLIB::SIN_F80,
RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
@@ -540,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
RTLIB::SQRT_F80,
RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
@@ -553,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -568,10 +604,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
RTLIB::TRUNC_F80,
RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
+ bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo));
LoadSDNode *L = cast<LoadSDNode>(N);
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -586,7 +623,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ if (N != NewL.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
return NewL;
}
@@ -600,17 +638,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
- return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+ auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
+ if (LegalInHWReg)
+ return ExtendNode;
+ return BitConvertToInteger(ExtendNode);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue LHS = GetSoftenedFloat(N->getOperand(1));
SDValue RHS = GetSoftenedFloat(N->getOperand(2));
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), N->getOperand(0), LHS, RHS);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue LHS = GetSoftenedFloat(N->getOperand(2));
SDValue RHS = GetSoftenedFloat(N->getOperand(3));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
@@ -636,7 +681,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ if (N != NewVAARG.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
return NewVAARG;
}
@@ -665,12 +711,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, Signed, dl).first;
+ Op, Signed, dl).first;
}
//===----------------------------------------------------------------------===//
-// Operand Float to Integer Conversion..
+// Convert Float Operand to Integer for Non-HW-supported Operations.
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
@@ -680,6 +726,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
switch (N->getOpcode()) {
default:
+ if (CanSkipSoftenFloatOperand(N, OpNo))
+ return false;
#ifndef NDEBUG
dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
@@ -691,18 +739,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
- case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
- case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
- case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ case ISD::STORE:
+ Res = SoftenFloatOp_STORE(N, OpNo);
+ // Do not try to analyze or soften this node again if the value is
+ // or can be held in a register. In that case, Res.getNode() should
+ // be equal to N.
+ if (Res.getNode() == N &&
+ isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+ return false;
+ // Otherwise, we need to reanalyze and lower the new Res nodes.
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
if (!Res.getNode()) return false;
// If the result is N, the sub-method updated N in place. Tell the legalizer
- // core about this.
+ // core about this to re-analyze.
if (Res.getNode() == N)
return true;
@@ -713,6 +770,41 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
return false;
}
+bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+ return false;
+ // When the operand type can be kept in registers, SoftenFloatResult
+ // will call ReplaceValueWith to replace all references and we can
+ // skip softening this operand.
+ switch (N->getOperand(OpNo).getOpcode()) {
+ case ISD::BITCAST:
+ case ISD::ConstantFP:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::FABS:
+ case ISD::FCOPYSIGN:
+ case ISD::FNEG:
+ case ISD::Register:
+ case ISD::SELECT:
+ case ISD::SELECT_CC:
+ return true;
+ }
+ // For some opcodes, SoftenFloatResult handles all conversion of softening
+ // and replacing operands, so that there is no need to soften operands
+ // again, although such opcode could be scanned for other illegal operands.
+ switch (N->getOpcode()) {
+ case ISD::ConstantFP:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::FABS:
+ case ISD::FCOPYSIGN:
+ case ISD::FNEG:
+ case ISD::Register:
+ return true;
+ }
+ return false;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
GetSoftenedFloat(N->getOperand(0)));
@@ -730,7 +822,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
}
@@ -747,7 +839,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -773,20 +865,33 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
+ EVT SVT = N->getOperand(0).getValueType();
EVT RVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
-}
+ EVT NVT = EVT();
+ SDLoc dl(N);
+
+ // If the result is not legal, eg: fp -> i1, then it needs to be promoted to
+ // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly
+ // match, eg. we don't have fp -> i8 conversions.
+ // Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ NVT = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (NVT.bitsGE(RVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
- EVT RVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+ SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first;
+
+ // Truncate the result if the libcall returns a larger type.
+ return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
@@ -1028,7 +1133,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- N->getValueType(0), Ops, 2, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1102,7 +1207,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- N->getValueType(0), Ops, 3, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1116,7 +1221,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- N->getValueType(0), Ops, 2, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1231,7 +1336,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- N->getValueType(0), Ops, 2, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1310,7 +1415,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
- Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first;
+ Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first;
GetPairElements(Hi, Lo, Hi);
}
@@ -1341,6 +1446,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
break;
}
+ // TODO: Are there fast-math-flags to propagate to this FADD?
Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
APInt(128, Parts)),
@@ -1494,7 +1600,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first;
+ return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1511,6 +1617,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
// FIXME: generated code sucks.
+ // TODO: Are there fast-math-flags to propagate to this FSUB?
return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
DAG.getNode(ISD::ADD, dl, MVT::i32,
DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
@@ -1527,7 +1634,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
false, dl).first;
}
@@ -1912,8 +2019,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
SDValue Op1 = GetPromotedFloat(N->getOperand(1));
-
- return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags());
}
SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9f060a09..cd114d6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -53,6 +53,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
@@ -65,16 +66,20 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
- case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
- case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
+ case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
+ case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
- case ISD::SMAX:
+ case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UMIN:
- case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+
case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
@@ -114,10 +119,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
case ISD::SDIV:
- case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;
+ case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UDIV:
- case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;
+ case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
case ISD::SADDO:
case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
@@ -180,7 +185,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
N->getChain(), N->getBasePtr(),
N->getMemOperand(), N->getOrdering(),
N->getSynchScope());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -193,7 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
N->getChain(), N->getBasePtr(),
Op2, N->getMemOperand(), N->getOrdering(),
N->getSynchScope());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -316,6 +321,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ return DAG.getNode(
+ ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
// The pair element type may be legal, or may not promote to the same type as
// the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
@@ -465,7 +483,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
N->getMemoryVT(), N->getMemOperand());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -475,20 +493,34 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
- SDValue Mask = N->getMask();
- EVT NewMaskVT = getSetCCResultType(NVT);
- if (NewMaskVT != N->getMask().getValueType())
- Mask = PromoteTargetBoolean(Mask, NewMaskVT);
SDLoc dl(N);
-
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
- Mask, ExtSrc0, N->getMemoryVT(),
+ N->getMask(), ExtSrc0, N->getMemoryVT(),
N->getMemOperand(), ISD::SEXTLOAD);
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtSrc0 = GetPromotedInteger(N->getValue());
+ assert(NVT == ExtSrc0.getValueType() &&
+ "Gather result type and the passThru agrument type should be the same");
+
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
+ N->getIndex()};
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
+
/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
// Simply change the return type of the boolean result.
@@ -534,14 +566,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
- // Sign extend the input.
- SDValue LHS = SExtPromotedInteger(N->getOperand(0));
- SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
@@ -629,6 +653,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -770,14 +810,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
return Mul;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
- // Zero extend the input.
- SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
- SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
N->getValueType(0)));
@@ -875,6 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
OpNo); break;
case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
OpNo); break;
+ case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
+ OpNo); break;
+ case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
+ OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
@@ -1143,56 +1179,49 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getMemoryVT(), N->getMemOperand());
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
- EVT MaskVT = Mask.getValueType();
SDLoc dl(N);
bool TruncateStore = false;
- if (!TLI.isTypeLegal(DataVT)) {
- if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
- DataOp = GetPromotedInteger(DataOp);
- if (!TLI.isTypeLegal(MaskVT))
- Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
- TruncateStore = true;
- }
+ if (OpNo == 2) {
+ // Mask comes before the data operand. If the data operand is legal, we just
+ // promote the mask.
+ // When the data operand has illegal type, we should legalize the data
+ // operand first. The mask will be promoted/splitted/widened according to
+ // the data operand type.
+ if (TLI.isTypeLegal(DataVT))
+ Mask = PromoteTargetBoolean(Mask, DataVT);
else {
- assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
- "Unexpected data legalization in MSTORE");
- DataOp = GetWidenedVector(DataOp);
-
- if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
- Mask = GetWidenedVector(Mask);
- else {
- EVT BoolVT = getSetCCResultType(DataOp.getValueType());
+ if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
+ return PromoteIntOp_MSTORE(N, 3);
- // We can't use ModifyToType() because we should fill the mask with
- // zeroes
- unsigned WidenNumElts = BoolVT.getVectorNumElements();
- unsigned MaskNumElts = MaskVT.getVectorNumElements();
+ else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
+ return WidenVecOp_MSTORE(N, 3);
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
- Ops[0] = Mask;
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = ZeroVal;
-
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ else {
+ assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
+ return SplitVecOp_MSTORE(N, 3);
}
}
+ } else { // Data operand
+ assert(OpNo == 3 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+ Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+ TruncateStore = true;
}
- else
- Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
+
return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
N->getMemoryVT(), N->getMemOperand(),
TruncateStore);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
+ unsigned OpNo) {
assert(OpNo == 2 && "Only know how to promote the mask!");
EVT DataVT = N->getValueType(0);
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
@@ -1201,6 +1230,31 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo)
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
+ unsigned OpNo) {
+
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValueType(0);
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
+ unsigned OpNo) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValue().getValueType();
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
@@ -1259,6 +1313,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
case ISD::CTLZ_ZERO_UNDEF:
@@ -1270,6 +1325,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
@@ -1763,12 +1819,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
-void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
- SDValue &Lo, SDValue &Hi) {
- SDValue Res = DisintegrateMERGE_VALUES(N, ResNo);
- SplitInteger(Res, Lo, Hi);
-}
-
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -1834,6 +1884,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
}
}
+void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
@@ -1918,8 +1976,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/,
- dl).first,
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first,
Lo, Hi);
}
@@ -1934,8 +1991,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/,
- dl).first,
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first,
Lo, Hi);
}
@@ -2055,7 +2111,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
}
}
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ch);
}
@@ -2096,11 +2152,21 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/,
- dl).first,
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
+ SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
+ Lo = R.getValue(0);
+ Hi = R.getValue(1);
+ ReplaceValueWith(SDValue(N, 1), R.getValue(2));
+}
+
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue &Lo, SDValue &Hi) {
SDValue LHS = Node->getOperand(0);
@@ -2166,7 +2232,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -2261,8 +2327,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo,
- Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi);
return;
}
@@ -2352,7 +2417,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -2499,7 +2564,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -2525,7 +2590,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -2605,6 +2670,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break;
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
@@ -2732,6 +2798,47 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
return;
}
+ if (LHSHi == RHSHi) {
+ // Comparing the low bits is enough.
+ NewLHS = Tmp1;
+ NewRHS = SDValue();
+ return;
+ }
+
+ // Lower with SETCCE if the target supports it.
+ // FIXME: Make all targets support this, then remove the other lowering.
+ if (TLI.getOperationAction(
+ ISD::SETCCE,
+ TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) ==
+ TargetLowering::Custom) {
+ // SETCCE can detect < and >= directly. For > and <=, flip operands and
+ // condition code.
+ bool FlipOperands = false;
+ switch (CCCode) {
+ case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
+ case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break;
+ case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break;
+ case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break;
+ default: break;
+ }
+ if (FlipOperands) {
+ std::swap(LHSLo, RHSLo);
+ std::swap(LHSHi, RHSHi);
+ }
+ // Perform a wide subtraction, feeding the carry from the low part into
+ // SETCCE. The SETCCE operation is essentially looking at the high part of
+ // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or
+ // positive iff LHS >= RHS.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+ SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo);
+ SDValue Res =
+ DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()),
+ LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode));
+ NewLHS = Res;
+ NewRHS = SDValue();
+ return;
+ }
+
NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
LHSHi, RHSHi, ISD::SETEQ, false,
DagCombineInfo, dl);
@@ -2796,6 +2903,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
DAG.getCondCode(CCCode)), 0);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+ SDLoc dl = SDLoc(N);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(LHS, LHSLo, LHSHi);
+ GetExpandedInteger(RHS, RHSLo, RHSHi);
+
+ // Expand to a SUBE for the low part and a smaller SETCCE for the high.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+ SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry);
+ return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi,
+ LowCmp.getValue(1), Cond);
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
// The value being shifted is legal, but the shift amount is too big.
// It follows that either the result of the shift is undefined, or the
@@ -2820,7 +2945,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2980,11 +3105,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// Load the value out, extending it from f32 to the destination float type.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
- FudgePtr,
- MachinePointerInfo::getConstantPool(),
- MVT::f32,
- false, false, false, Alignment);
+ SDValue Fudge = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ false, false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
@@ -2992,7 +3116,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first;
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 54cfaf5..2a0b0aa 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -73,21 +73,20 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
// (for example because it was created but not used). In general, we cannot
// distinguish between new nodes and deleted nodes.
SmallVector<SDNode*, 16> NewNodes;
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I) {
+ for (SDNode &Node : DAG.allnodes()) {
// Remember nodes marked NewNode - they are subject to extra checking below.
- if (I->getNodeId() == NewNode)
- NewNodes.push_back(I);
+ if (Node.getNodeId() == NewNode)
+ NewNodes.push_back(&Node);
- for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
- SDValue Res(I, i);
+ for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
+ SDValue Res(&Node, i);
bool Failed = false;
unsigned Mapped = 0;
if (ReplacedValues.find(Res) != ReplacedValues.end()) {
Mapped |= 1;
// Check that remapped values are only used by nodes marked NewNode.
- for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+ for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
UI != UE; ++UI)
if (UI.getUse().getResNo() == i)
assert(UI->getNodeId() == NewNode &&
@@ -119,16 +118,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
if (WidenedVectors.find(Res) != WidenedVectors.end())
Mapped |= 128;
- if (I->getNodeId() != Processed) {
+ if (Node.getNodeId() != Processed) {
// Since we allow ReplacedValues to map deleted nodes, it may map nodes
// marked NewNode too, since a deleted node may have been reallocated as
// another node that has not been seen by the LegalizeTypes machinery.
- if ((I->getNodeId() == NewNode && Mapped > 1) ||
- (I->getNodeId() != NewNode && Mapped != 0)) {
+ if ((Node.getNodeId() == NewNode && Mapped > 1) ||
+ (Node.getNodeId() != NewNode && Mapped != 0)) {
dbgs() << "Unprocessed value in a map!";
Failed = true;
}
- } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
if (Mapped > 1) {
dbgs() << "Value with legal type was transformed!";
Failed = true;
@@ -194,13 +193,12 @@ bool DAGTypeLegalizer::run() {
// Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
// (and remembering them) if they are leaves and assigning 'Unanalyzed' if
// non-leaves.
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I) {
- if (I->getNumOperands() == 0) {
- I->setNodeId(ReadyToProcess);
- Worklist.push_back(I);
+ for (SDNode &Node : DAG.allnodes()) {
+ if (Node.getNumOperands() == 0) {
+ Node.setNodeId(ReadyToProcess);
+ Worklist.push_back(&Node);
} else {
- I->setNodeId(Unanalyzed);
+ Node.setNodeId(Unanalyzed);
}
}
@@ -240,9 +238,13 @@ bool DAGTypeLegalizer::run() {
Changed = true;
goto NodeDone;
case TargetLowering::TypeSoftenFloat:
- SoftenFloatResult(N, i);
- Changed = true;
- goto NodeDone;
+ Changed = SoftenFloatResult(N, i);
+ if (Changed)
+ goto NodeDone;
+ // If not changed, the result type should be legally in register.
+ assert(isLegalInHWReg(ResultVT) &&
+ "Unchanged SoftenFloatResult should be legal in register!");
+ goto ScanOperands;
case TargetLowering::TypeExpandFloat:
ExpandFloatResult(N, i);
Changed = true;
@@ -409,40 +411,48 @@ NodeDone:
// In a debug build, scan all the nodes to make sure we found them all. This
// ensures that there are no cycles and that everything got processed.
#ifndef NDEBUG
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I) {
+ for (SDNode &Node : DAG.allnodes()) {
bool Failed = false;
// Check that all result types are legal.
- if (!IgnoreNodeResults(I))
- for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
- if (!isTypeLegal(I->getValueType(i))) {
- dbgs() << "Result type " << i << " illegal!\n";
+ // A value type is illegal if its TypeAction is not TypeLegal,
+ // and TLI.RegClassForVT does not have a register class for this type.
+ // For example, the x86_64 target has f128 that is not TypeLegal,
+ // to have softened operators, but it also has FR128 register class to
+ // pass and return f128 values. Hence a legalized node can have f128 type.
+ if (!IgnoreNodeResults(&Node))
+ for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(Node.getValueType(i)) &&
+ !TLI.isTypeLegal(Node.getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal: ";
+ Node.dump();
Failed = true;
}
// Check that all operand types are legal.
- for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
- if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
- !isTypeLegal(I->getOperand(i).getValueType())) {
- dbgs() << "Operand type " << i << " illegal!\n";
+ for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
+ !isTypeLegal(Node.getOperand(i).getValueType()) &&
+ !TLI.isTypeLegal(Node.getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal: ";
+ Node.getOperand(i).dump();
Failed = true;
}
- if (I->getNodeId() != Processed) {
- if (I->getNodeId() == NewNode)
+ if (Node.getNodeId() != Processed) {
+ if (Node.getNodeId() == NewNode)
dbgs() << "New node not analyzed?\n";
- else if (I->getNodeId() == Unanalyzed)
+ else if (Node.getNodeId() == Unanalyzed)
dbgs() << "Unanalyzed node not noticed?\n";
- else if (I->getNodeId() > 0)
+ else if (Node.getNodeId() > 0)
dbgs() << "Operand not processed?\n";
- else if (I->getNodeId() == ReadyToProcess)
+ else if (Node.getNodeId() == ReadyToProcess)
dbgs() << "Not added to worklist?\n";
Failed = true;
}
if (Failed) {
- I->dump(&DAG); dbgs() << "\n";
+ Node.dump(&DAG); dbgs() << "\n";
llvm_unreachable(nullptr);
}
}
@@ -751,13 +761,23 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
- assert(Result.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ // f128 of x86_64 could be kept in SSE registers,
+ // but sometimes softened to i128.
+ assert((Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) ||
+ Op.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
"Invalid type for softened float");
AnalyzeNewValue(Result);
SDValue &OpEntry = SoftenedFloats[Op];
- assert(!OpEntry.getNode() && "Node is already converted to integer!");
+ // Allow repeated calls to save f128 type nodes
+ // or any node with type that transforms to itself.
+ // Many operations on these types are not softened.
+ assert((!OpEntry.getNode()||
+ Op.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ "Node is already converted to integer!");
OpEntry = Result;
}
@@ -1042,23 +1062,22 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
unsigned NumOps = N->getNumOperands();
SDLoc dl(N);
if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned,
dl).first;
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned,
dl).first;
} else if (NumOps == 2) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned,
dl).first;
}
SmallVector<SDValue, 8> Ops(NumOps);
for (unsigned i = 0; i < NumOps; ++i)
Ops[i] = N->getOperand(i);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0),
- &Ops[0], NumOps, isSigned, dl).first;
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;
}
// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
@@ -1108,6 +1127,23 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
}
+/// WidenTargetBoolean - Widen the given target boolean to a target boolean
+/// of the given type. The boolean vector is widened and then promoted to match
+/// the target boolean type of the given ValVT.
+SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
+ bool WithZeroes) {
+ SDLoc dl(Bool);
+ EVT BoolVT = Bool.getValueType();
+
+ assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() &&
+ TLI.isTypeLegal(ValVT) &&
+ "Unexpected types in WidenTargetBoolean");
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(),
+ ValVT.getVectorNumElements());
+ Bool = ModifyToType(Bool, WideVT, WithZeroes);
+ return PromoteTargetBoolean(Bool, ValVT);
+}
+
/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
/// bits in Hi.
void DAGTypeLegalizer::SplitInteger(SDValue Op,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d1131a7..8ba19f7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -72,6 +72,20 @@ private:
return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
}
+ /// isSimpleLegalType - Return true if this is a simple legal type.
+ bool isSimpleLegalType(EVT VT) const {
+ return VT.isSimple() && TLI.isTypeLegal(VT);
+ }
+
+ /// isLegalInHWReg - Return true if this type can be passed in registers.
+ /// For example, x86_64's f128, should to be legally in registers
+ /// and only some operations converted to library calls or integer
+ /// bitwise operations.
+ bool isLegalInHWReg(EVT VT) const {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return VT == NVT && isSimpleLegalType(VT);
+ }
+
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
@@ -173,6 +187,11 @@ private:
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
+
+ /// Modify Bit Vector to match SetCC result type of ValVT.
+ /// The bit vector is widened with zeroes when WithZeroes is true.
+ SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
+
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -234,6 +253,7 @@ private:
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BITREVERSE(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
@@ -246,21 +266,22 @@ private:
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
+ SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_SDIV(SDNode *N);
SDValue PromoteIntRes_SELECT(SDNode *N);
SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
SDValue PromoteIntRes_SHL(SDNode *N);
SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue PromoteIntRes_SRA(SDNode *N);
SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_UDIV(SDNode *N);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
@@ -276,7 +297,6 @@ private:
SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
@@ -284,7 +304,6 @@ private:
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
@@ -294,6 +313,8 @@ private:
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -312,8 +333,6 @@ private:
// Integer Result Expansion.
void ExpandIntegerResult(SDNode *N, unsigned ResNo);
- void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
- SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -322,6 +341,7 @@ private:
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -333,6 +353,7 @@ private:
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -354,12 +375,10 @@ private:
// Integer Operand Expansion.
bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
- SDValue ExpandIntOp_BITCAST(SDNode *N);
SDValue ExpandIntOp_BR_CC(SDNode *N);
- SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
- SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
SDValue ExpandIntOp_SELECT_CC(SDNode *N);
SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_SETCCE(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -375,32 +394,48 @@ private:
// Float to Integer Conversion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetSoftenedFloat - Given a processed operand Op which was converted to an
- /// integer of the same size, this returns the integer. The integer contains
- /// exactly the same bits as Op - only the type changed. For example, if Op
- /// is an f32 which was softened to an i32, then this method returns an i32,
- /// the bits of which coincide with those of Op.
+ /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer
+ /// if the Op is not supported in target HW and converted to the integer.
+ /// The integer contains exactly the same bits as Op - only the type changed.
+ /// For example, if Op is an f32 which was softened to an i32, then this method
+ /// returns an i32, the bits of which coincide with those of Op.
+ /// If the Op can be efficiently supported in target HW or the operand must
+ /// stay in a register, the Op is not converted to an integer.
+ /// In that case, the given op is returned.
SDValue GetSoftenedFloat(SDValue Op) {
SDValue &SoftenedOp = SoftenedFloats[Op];
+ if (!SoftenedOp.getNode() &&
+ isSimpleLegalType(Op.getValueType()))
+ return Op;
RemapValue(SoftenedOp);
assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
return SoftenedOp;
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
- // Result Float to Integer Conversion.
- void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary.
+ void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) {
+ // When the result type can be kept in HW registers, the converted
+ // NewRes node could have the same type. We can save the effort in
+ // cloning every user of N in SoftenFloatOperand or other legalization functions,
+ // by calling ReplaceValueWith here to update all users.
+ if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo)))
+ ReplaceValueWith(SDValue(N, ResNo), NewRes);
+ }
+
+ // Convert Float Results to Integer for Non-HW-supported Operations.
+ bool SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_BITCAST(SDNode *N);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
- SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
- SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
- SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FCOS(SDNode *N);
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
@@ -412,7 +447,7 @@ private:
SDValue SoftenFloatRes_FMA(SDNode *N);
SDValue SoftenFloatRes_FMUL(SDNode *N);
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
- SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
@@ -425,21 +460,25 @@ private:
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
- SDValue SoftenFloatRes_LOAD(SDNode *N);
- SDValue SoftenFloatRes_SELECT(SDNode *N);
- SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
- // Operand Float to Integer Conversion.
+ // Return true if we can skip softening the given operand or SDNode because
+ // it was soften before by SoftenFloatResult and references to the operand
+ // were replaced by ReplaceValueWith.
+ bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
+
+ // Convert Float Operand to Integer for Non-HW-supported Operations.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
- SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
- SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -575,7 +614,6 @@ private:
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
- SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
@@ -617,20 +655,18 @@ private:
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
@@ -650,6 +686,7 @@ private:
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
+ SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -680,8 +717,8 @@ private:
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
+ SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
- SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
@@ -693,6 +730,7 @@ private:
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
@@ -707,9 +745,11 @@ private:
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
+ SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
@@ -745,8 +785,10 @@ private:
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
- SDValue ModifyToType(SDValue InOp, EVT WidenVT);
-
+ /// When FillWithZeroes is "on" the vector will be widened with
+ /// zeroes.
+ /// By default, the vector will be widened with undefined values.
+ SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
//===--------------------------------------------------------------------===//
// Generic Splitting: LegalizeTypesGeneric.cpp
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 14d8f77..593c346 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -53,12 +53,17 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypePromoteFloat:
llvm_unreachable("Bitcast of a promotion-needing float should never need"
"expansion");
- case TargetLowering::TypeSoftenFloat:
- // Convert the integer operand instead.
- SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ case TargetLowering::TypeSoftenFloat: {
+ // Expand the floating point operand only if it was converted to integers.
+ // Otherwise, it is a legal type like f128 that can be saved in a register.
+ auto SoftenedOp = GetSoftenedFloat(InOp);
+ if (SoftenedOp == InOp)
+ break;
+ SplitInteger(SoftenedOp, Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
+ }
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat: {
auto &DL = DAG.getDataLayout();
@@ -161,7 +166,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
NOutVT.getTypeForEVT(*DAG.getContext()));
SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
// Emit a store to the stack slot.
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 83d4ad5..f61f631 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,8 @@ class VectorLegalizer {
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandBITREVERSE(SDValue Op);
+ SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
/// \brief Implements vector promotion.
///
@@ -159,7 +161,7 @@ bool VectorLegalizer::Run() {
DAG.AssignTopologicalOrder();
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
- LegalizeOp(SDValue(I, 0));
+ LegalizeOp(SDValue(&*I, 0));
// Finally, it's possible the root changed. Get the new root.
SDValue OldRoot = DAG.getRoot();
@@ -218,9 +220,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
assert(Result.getValue(1).use_empty() &&
"There are still live users of the old chain!");
return LegalizeOp(Lowered);
- } else {
- return TranslateLegalizeResults(Op, Lowered);
}
+ return TranslateLegalizeResults(Op, Lowered);
}
case TargetLowering::Expand:
Changed = true;
@@ -231,7 +232,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
if (StVT.isVector() && ST->isTruncatingStore())
- switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
+ switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
@@ -244,7 +245,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Changed = true;
return LegalizeOp(ExpandStore(Op));
}
- } else if (Op.getOpcode() == ISD::MSCATTER)
+ } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
HasVectorValue = true;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
@@ -265,6 +266,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -279,6 +282,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::ROTL:
case ISD::ROTR:
case ISD::BSWAP:
+ case ISD::BITREVERSE:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -298,6 +302,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FABS:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
@@ -338,9 +344,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::MSCATTER:
QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
break;
+ case ISD::MSTORE:
+ QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType();
+ break;
}
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
Result = Promote(Op);
Changed = true;
@@ -411,7 +421,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
Operands[j] = Op.getOperand(j);
}
- Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
@@ -708,6 +718,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandFNEG(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
+ case ISD::BITREVERSE:
+ return ExpandBITREVERSE(Op);
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -893,6 +908,25 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
+SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // If we have the scalar operation, it's probably cheaper to unroll it.
+ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::OR, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Let LegalizeDAG handle this later.
+ return Op;
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
@@ -971,6 +1005,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
// Convert hi and lo to floats
// Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
@@ -984,12 +1019,23 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
SDLoc DL(Op);
SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+ // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
Zero, Op.getOperand(0));
}
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
+ // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle.
+ unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
+ if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType()))
+ return Op;
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumElems = VT.getVectorNumElements();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 51cd661..d0187d3 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -67,6 +67,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
case ISD::ANY_EXTEND:
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -108,6 +109,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
case ISD::FPOW:
case ISD::FREM:
@@ -139,7 +146,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ LHS.getValueType(), LHS, RHS, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
@@ -228,7 +235,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->isInvariant(), N->getOriginalAlignment(),
N->getAAInfo());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
return Result;
@@ -594,6 +601,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
@@ -613,6 +621,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
@@ -656,11 +665,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SUB:
case ISD::MUL:
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
case ISD::SDIV:
case ISD::UDIV:
case ISD::FDIV:
@@ -698,8 +708,10 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
SDLoc dl(N);
- Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
- Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+ const SDNodeFlags *Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
}
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@@ -870,6 +882,25 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
}
+void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc DL(N);
+
+ SDValue RHSLo, RHSHi;
+ SDValue RHS = N->getOperand(1);
+ EVT RHSVT = RHS.getValueType();
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
+
+
+ Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
@@ -989,7 +1020,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(LD, 1), Ch);
}
@@ -1003,6 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue Ch = MLD->getChain();
SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask();
+ SDValue Src0 = MLD->getSrc0();
unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
@@ -1012,16 +1044,22 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
(Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
Alignment/2 : Alignment;
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue Src0 = MLD->getSrc0();
SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -1049,7 +1087,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MLD, 1), Ch);
@@ -1064,20 +1102,33 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ch = MGT->getChain();
SDValue Ptr = MGT->getBasePtr();
SDValue Mask = MGT->getMask();
+ SDValue Src0 = MGT->getValue();
+ SDValue Index = MGT->getIndex();
unsigned Alignment = MGT->getOriginalAlignment();
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
+ // Split MemoryVT
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
SDValue IndexHi, IndexLo;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl);
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
@@ -1097,7 +1148,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MGT, 1), Ch);
}
@@ -1357,6 +1408,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -1567,23 +1619,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ptr = MGT->getBasePtr();
SDValue Index = MGT->getIndex();
SDValue Mask = MGT->getMask();
+ SDValue Src0 = MGT->getValue();
unsigned Alignment = MGT->getOriginalAlignment();
SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
SDValue IndexHi, IndexLo;
- if (Index.getNode())
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
else
- IndexLo = IndexHi = Index;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
@@ -1609,7 +1669,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MGT, 1), Ch);
@@ -1633,9 +1693,21 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
- GetSplitVector(Data, DataLo, DataHi);
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
SDValue MaskLo, MaskHi;
- GetSplitVector(Mask, MaskLo, MaskHi);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+
+ MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
+ MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
// if Alignment is equal to the vector size,
// take the half of it for the second part
@@ -1680,25 +1752,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N);
+ // Split all operands
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
- GetSplitVector(Data, DataLo, DataHi);
- SDValue MaskLo, MaskHi;
- GetSplitVector(Mask, MaskLo, MaskHi);
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
- SDValue PtrLo, PtrHi;
- if (Ptr.getValueType().isVector()) // gather form vector of pointers
- std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL);
+ SDValue MaskLo, MaskHi;
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
else
- PtrLo = PtrHi = Ptr;
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
SDValue IndexHi, IndexLo;
- if (Index.getNode())
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
else
- IndexLo = IndexHi = Index;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -1706,7 +1782,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo};
+ SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO);
@@ -1715,7 +1791,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi};
+ SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
@@ -1891,6 +1967,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and the first input) has a legal vector type, but the second
+ // input needs splitting.
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+}
//===----------------------------------------------------------------------===//
@@ -1938,6 +2019,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MLOAD:
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
+ case ISD::MGATHER:
+ Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
case ISD::ADD:
case ISD::AND:
@@ -1949,11 +2033,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::XOR:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
Res = WidenVecRes_Binary(N);
break;
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FMUL:
case ISD::FPOW:
case ISD::FSUB:
@@ -1966,6 +2055,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::FCOPYSIGN:
+ Res = WidenVecRes_FCOPYSIGN(N);
+ break;
+
case ISD::FPOWI:
Res = WidenVecRes_POWI(N);
break;
@@ -1989,6 +2082,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTPOP:
@@ -2037,7 +2131,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
@@ -2048,6 +2142,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
+ const SDNodeFlags *Flags = N->getFlags();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
@@ -2057,7 +2152,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// Operation doesn't trap so just widen as normal.
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
}
// No legal vector version so unroll the vector operation and then widen.
@@ -2087,7 +2182,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
SDValue EOp2 = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
Idx += NumElts;
CurNumElts -= NumElts;
}
@@ -2105,7 +2200,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
- EOp1, EOp2);
+ EOp1, EOp2, Flags);
}
CurNumElts = 0;
}
@@ -2195,7 +2290,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();
unsigned InVTNumElts = InVT.getVectorNumElements();
-
+ const SDNodeFlags *Flags = N->getFlags();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
@@ -2203,7 +2298,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (InVTNumElts == WidenNumElts) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
- return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
}
@@ -2224,7 +2319,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVec);
- return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
if (InVTNumElts % WidenNumElts == 0) {
@@ -2234,7 +2329,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVal);
- return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
}
}
@@ -2250,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (N->getNumOperands() == 1)
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
else
- Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
@@ -2260,6 +2355,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
+ // If this is an FCOPYSIGN with same input types, we can treat it as a
+ // normal (can trap) binary op.
+ if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
+ return WidenVecRes_BinaryCanTrap(N);
+
+ // If the types are different, fall back to unrolling.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -2669,7 +2775,35 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, N->getMemoryVT(),
N->getMemOperand(), ExtType);
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
+
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue Src0 = GetWidenedVector(N->getValue());
+ unsigned NumElts = WideVT.getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+ // Widen the Index operand
+ SDValue Index = N->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -2831,7 +2965,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
+ case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -2928,6 +3064,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
}
}
+SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and first input) is legal, but the second input is illegal.
+ // We can't do much to fix that, so just unroll and let the extracts off of
+ // the second input be widened as needed later.
+ return DAG.UnrollVectorOp(N);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal, it is unlikely
// that we can fix the input to a legal type so unroll the convert
@@ -3070,6 +3213,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
false);
}
+SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can widen only data operand of mscatter");
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue DataOp = MSC->getValue();
+ SDValue Mask = MSC->getMask();
+
+ // Widen the value
+ SDValue WideVal = GetWidenedVector(DataOp);
+ EVT WideVT = WideVal.getValueType();
+ unsigned NumElts = WideVal.getValueType().getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+ // Widen index
+ SDValue Index = MSC->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+
+ SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+ MSC->getMemoryVT(), dl, Ops,
+ MSC->getMemOperand());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
@@ -3533,7 +3704,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
-SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+/// FillWithZeroes specifies that the vector should be widened with zeroes.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
+ bool FillWithZeroes) {
// Note that InOp might have been widened so it might already have
// the right width or it might need be narrowed.
EVT InVT = InOp.getValueType();
@@ -3550,10 +3723,11 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
unsigned NumConcat = WidenNumElts / InNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(InVT);
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
+ DAG.getUNDEF(InVT);
Ops[0] = InOp;
for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = UndefVal;
+ Ops[i] = FillVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
}
@@ -3573,8 +3747,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
+ DAG.getUNDEF(EltVT);
for ( ; Idx < WidenNumElts; ++Idx)
- Ops[Idx] = UndefVal;
+ Ops[Idx] = FillVal;
return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 6303422..622e06f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -49,7 +49,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
TII = STI.getInstrInfo();
ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
// This hard requirement could be relaxed, but for now
- // do not let it procede.
+ // do not let it proceed.
assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
unsigned NumRC = TRI->getNumRegClasses();
@@ -269,12 +269,12 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
}
// Now see if there are no other dependencies
- // to instructions alredy in the packet.
+ // to instructions already in the packet.
for (unsigned i = 0, e = Packet.size(); i != e; ++i)
for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
E = Packet[i]->Succs.end(); I != E; ++I) {
// Since we do not add pseudos to packets, might as well
- // ignor order deps.
+ // ignore order deps.
if (I->isCtrl())
continue;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 34e1a70..62e7733 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -440,7 +440,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -519,7 +519,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e9bd520..91024e6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -141,8 +141,8 @@ private:
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
unsigned NumLiveRegs;
- std::vector<SUnit*> LiveRegDefs;
- std::vector<SUnit*> LiveRegGens;
+ std::unique_ptr<SUnit*[]> LiveRegDefs;
+ std::unique_ptr<SUnit*[]> LiveRegGens;
// Collect interferences between physical register use/defs.
// Each interference is an SUnit and set of physical registers.
@@ -328,8 +328,8 @@ void ScheduleDAGRRList::Schedule() {
NumLiveRegs = 0;
// Allocate slots for each physical register, plus one for a special register
// to track the virtual resource of a calling sequence.
- LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr);
- LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr);
+ LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
+ LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
CallSeqEndForStart.clear();
assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
@@ -1206,7 +1206,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -1218,7 +1218,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
- std::vector<SUnit*> &LiveRegDefs,
+ SUnit **LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
const TargetRegisterInfo *TRI) {
@@ -1240,7 +1240,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
/// by RegMask, and add them to LRegs.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
- std::vector<SUnit*> &LiveRegDefs,
+ ArrayRef<SUnit*> LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs) {
// Look at all live registers. Skip Reg0 and the special CallResource.
@@ -1278,7 +1278,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
- CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(),
RegAdded, LRegs, TRI);
}
@@ -1302,7 +1302,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
} else
i += NumVals;
@@ -1328,13 +1328,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
}
}
if (const uint32_t *RegMask = getNodeRegMask(Node))
- CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+ CheckForLiveRegDefMasked(SU, RegMask,
+ makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
+ RegAdded, LRegs);
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
- CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
return !LRegs.empty();
@@ -2718,7 +2720,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
ScheduleDAGRRList *scheduleDAG,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- const uint16_t *ImpDefs
+ const MCPhysReg *ImpDefs
= TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
const uint32_t *RegMask = getNodeRegMask(SU->getNode());
if(!ImpDefs && !RegMask)
@@ -2737,7 +2739,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
return true;
if (ImpDefs)
- for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
// Return true if SU clobbers this physical register use and the
// definition of the register reaches from DepSU. IsReachable queries
// a topological forward sort of the DAG (following the successors).
@@ -2756,13 +2758,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
- const uint16_t *SUImpDefs =
+ const MCPhysReg *SUImpDefs =
TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
const uint32_t *SURegMask = getNodeRegMask(SUNode);
if (!SUImpDefs && !SURegMask)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 159c28c..5cc8066 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -86,12 +86,6 @@ namespace llvm {
/// flagged together nodes with a single SUnit.
void BuildSchedGraph(AliasAnalysis *AA);
- /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
- /// CopyToReg and its only active data operands are CopyFromReg within a
- /// single block loop.
- ///
- void InitVRegCycleFlag(SUnit *SU);
-
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
void InitNumRegDefsLeft(SUnit *SU);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 14f44cc..abbc48e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -210,28 +211,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
return true;
}
-/// isScalarToVector - Return true if the specified node is a
-/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
-/// element is not an undef.
-bool ISD::isScalarToVector(const SDNode *N) {
- if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return true;
-
- if (N->getOpcode() != ISD::BUILD_VECTOR)
- return false;
- if (N->getOperand(0).getOpcode() == ISD::UNDEF)
- return false;
- unsigned NumElems = N->getNumOperands();
- if (NumElems == 1)
- return false;
- for (unsigned i = 1; i < NumElems; ++i) {
- SDValue V = N->getOperand(i);
- if (V.getOpcode() != ISD::UNDEF)
- return false;
- }
- return true;
-}
-
/// allOperandsUndef - Return true if the node has at least one operand
/// and all operands of the specified node are ISD::UNDEF.
bool ISD::allOperandsUndef(const SDNode *N) {
@@ -397,24 +376,21 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
ID.AddInteger(Op.getResNo());
}
}
+
/// Add logical or fast math flag values to FoldingSetNodeID value.
static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
const SDNodeFlags *Flags) {
- if (!Flags || !isBinOpWithFlags(Opcode))
+ if (!isBinOpWithFlags(Opcode))
return;
- unsigned RawFlags = Flags->getRawFlags();
- // If no flags are set, do not alter the ID. We must match the ID of nodes
- // that were created without explicitly specifying flags. This also saves time
- // and allows a gradual increase in API usage of the optional optimization
- // flags.
- if (RawFlags != 0)
- ID.AddInteger(RawFlags);
+ unsigned RawFlags = 0;
+ if (Flags)
+ RawFlags = Flags->getRawFlags();
+ ID.AddInteger(RawFlags);
}
static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
- if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N))
- AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags);
+ AddNodeIDFlags(ID, N->getOpcode(), N->getFlags());
}
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
@@ -624,9 +600,9 @@ void SelectionDAG::RemoveDeadNodes() {
SmallVector<SDNode*, 128> DeadNodes;
// Add all obviously-dead nodes to the DeadNodes worklist.
- for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
- if (I->use_empty())
- DeadNodes.push_back(I);
+ for (SDNode &Node : allnodes())
+ if (Node.use_empty())
+ DeadNodes.push_back(&Node);
RemoveDeadNodes(DeadNodes);
@@ -766,6 +742,7 @@ static void VerifySDNode(SDNode *N) {
void SelectionDAG::InsertNode(SDNode *N) {
AllNodes.push_back(N);
#ifndef NDEBUG
+ N->PersistentId = NextPersistentId++;
VerifySDNode(N);
#endif
}
@@ -929,7 +906,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
UpdateListeners(nullptr) {
- AllNodes.push_back(&EntryNode);
+ InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
}
@@ -950,7 +927,10 @@ void SelectionDAG::allnodes_clear() {
assert(&*AllNodes.begin() == &EntryNode);
AllNodes.remove(AllNodes.begin());
while (!AllNodes.empty())
- DeallocateNode(AllNodes.begin());
+ DeallocateNode(&AllNodes.front());
+#ifndef NDEBUG
+ NextPersistentId = 0;
+#endif
}
BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
@@ -1023,7 +1003,7 @@ void SelectionDAG::clear() {
static_cast<SDNode*>(nullptr));
EntryNode.UseList = nullptr;
- AllNodes.push_back(&EntryNode);
+ InsertNode(&EntryNode);
Root = getEntryNode();
DbgInfo->clear();
}
@@ -1429,8 +1409,8 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
- TargetFlags);
+ SDNode *N =
+ new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1852,8 +1832,58 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
if (OpTy == ShTy || OpTy.isVector()) return Op;
- ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
- return getNode(Opcode, SDLoc(Op), ShTy, Op);
+ return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
+}
+
+SDValue SelectionDAG::expandVAArg(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+ unsigned Align = Node->getConstantOperandVal(3);
+
+ SDValue VAListLoad =
+ getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, false, 0);
+ SDValue VAList = VAListLoad;
+
+ if (Align > TLI.getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(Align - 1, dl, VAList.getValueType()));
+
+ VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+ getConstant(-(int64_t)Align, dl, VAList.getValueType()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(getDataLayout().getTypeAllocSize(
+ VT.getTypeForEVT(*getContext())),
+ dl, VAList.getValueType()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
+ // Load the actual argument out of the pointer VAList
+ return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue SelectionDAG::expandVACopy(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl,
+ Node->getOperand(0), Node->getOperand(2),
+ MachinePointerInfo(VS), false, false, false, 0);
+ return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
}
/// CreateStackTemporary - Create a stack temporary, suitable for holding the
@@ -1872,8 +1902,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
/// CreateStackTemporary - Create a stack temporary suitable for holding
/// either of the specified value types.
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
- unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
- VT2.getStoreSizeInBits())/8;
+ unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
const DataLayout &DL = getDataLayout();
@@ -2255,7 +2284,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
- computeKnownBitsFromRangeMetadata(*Ranges, KnownZero);
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD)
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne);
}
break;
}
@@ -2564,6 +2594,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (Tmp == 1) return 1; // Early out.
Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
return std::min(Tmp, Tmp2);
+ case ISD::SELECT_CC:
+ Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
+ return std::min(Tmp, Tmp2);
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -2679,7 +2714,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
const int rIndex = Items - 1 -
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- // If the sign portion ends in our element the substraction gives correct
+ // If the sign portion ends in our element the subtraction gives correct
// result. Otherwise it gives either negative or > bitwidth result
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
@@ -2798,6 +2833,16 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
return false;
}
+bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
+ assert(A.getValueType() == B.getValueType() &&
+ "Values must have the same type");
+ APInt AZero, AOne;
+ APInt BZero, BOne;
+ computeKnownBits(A, AZero, AOne);
+ computeKnownBits(B, BZero, BOne);
+ return (AZero | BZero).isAllOnesValue();
+}
+
/// getNode - Gets or creates the specified node.
///
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
@@ -2848,8 +2893,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
- else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
+ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+ return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT);
break;
case ISD::BSWAP:
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
@@ -2954,44 +3001,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
- EVT SVT = VT.getScalarType();
- EVT InVT = BV->getValueType(0);
- EVT InSVT = InVT.getScalarType();
-
- // Find legal integer scalar type for constant promotion and
- // ensure that its scalar size is at least as large as source.
- EVT LegalSVT = SVT;
- if (SVT.isInteger()) {
- LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT);
- if (LegalSVT.bitsLT(SVT)) break;
- }
-
- // Let the above scalar folding handle the folding of each element.
- SmallVector<SDValue, 8> Ops;
- for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
- SDValue OpN = BV->getOperand(i);
- EVT OpVT = OpN.getValueType();
-
- // Build vector (integer) scalar operands may need implicit
- // truncation - do this before constant folding.
- if (OpVT.isInteger() && OpVT.bitsGT(InSVT))
- OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN);
-
- OpN = getNode(Opcode, DL, SVT, OpN);
-
- // Legalize the (integer) scalar constant if necessary.
- if (LegalSVT != SVT)
- OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN);
-
- if (OpN.getOpcode() != ISD::UNDEF &&
- OpN.getOpcode() != ISD::Constant &&
- OpN.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(OpN);
- }
- if (Ops.size() == VT.getVectorNumElements())
- return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
- break;
+ SDValue Ops = { Operand };
+ if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return Fold;
}
}
}
@@ -3012,6 +3024,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid fpext node, dst < src!");
if (Operand.getOpcode() == ISD::UNDEF)
return getUNDEF(VT);
break;
@@ -3019,12 +3033,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
- "Invalid sext node, dst < src!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid sext node, dst < src!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
else if (OpOpcode == ISD::UNDEF)
@@ -3035,12 +3049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ZERO_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
- "Invalid zext node, dst < src!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid zext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
return getNode(ISD::ZERO_EXTEND, DL, VT,
Operand.getNode()->getOperand(0));
@@ -3052,12 +3066,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ANY_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
- "Invalid anyext node, dst < src!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid anyext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
@@ -3077,12 +3091,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid TRUNCATE!");
if (Operand.getValueType() == VT) return Operand; // noop truncate
- assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
- "Invalid truncate node, src < dst!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsGT(VT) &&
+ "Invalid truncate node, src < dst!");
if (OpOpcode == ISD::TRUNCATE)
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
@@ -3135,8 +3149,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+ // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
- Operand.getNode()->getOperand(0));
+ Operand.getNode()->getOperand(0),
+ &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags);
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getNode()->getOperand(0);
break;
@@ -3182,6 +3198,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
+ case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
+ case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
+ case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
+ case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
@@ -3284,10 +3304,118 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs);
}
+SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
+ EVT VT,
+ ArrayRef<SDValue> Ops,
+ const SDNodeFlags *Flags) {
+ // If the opcode is a target-specific ISD node, there's nothing we can
+ // do here and the operand rules may not line up with the below, so
+ // bail early.
+ if (Opcode >= ISD::BUILTIN_OP_END)
+ return SDValue();
+
+ // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+ if (!VT.isVector())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
+ return !Op.getValueType().isVector() ||
+ Op.getValueType().getVectorNumElements() == NumElts;
+ };
+
+ auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+ return (Op.getOpcode() == ISD::UNDEF) ||
+ (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant());
+ };
+
+ // All operands must be vector types with the same number of elements as
+ // the result type and must be either UNDEF or a build vector of constant
+ // or UNDEF scalars.
+ if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) ||
+ !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize))
+ return SDValue();
+
+ // If we are comparing vectors, then the result needs to be a i1 boolean
+ // that is then sign-extended back to the legal result type.
+ EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
+
+ // Find legal integer scalar type for constant promotion and
+ // ensure that its scalar size is at least as large as source.
+ EVT LegalSVT = VT.getScalarType();
+ if (LegalSVT.isInteger()) {
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(SVT))
+ return SDValue();
+ }
+
+ // Constant fold each scalar lane separately.
+ SmallVector<SDValue, 4> ScalarResults;
+ for (unsigned i = 0; i != NumElts; i++) {
+ SmallVector<SDValue, 4> ScalarOps;
+ for (SDValue Op : Ops) {
+ EVT InSVT = Op.getValueType().getScalarType();
+ BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
+ if (!InBV) {
+ // We've checked that this is UNDEF or a constant of some kind.
+ if (Op.isUndef())
+ ScalarOps.push_back(getUNDEF(InSVT));
+ else
+ ScalarOps.push_back(Op);
+ continue;
+ }
+
+ SDValue ScalarOp = InBV->getOperand(i);
+ EVT ScalarVT = ScalarOp.getValueType();
+
+ // Build vector (integer) scalar operands may need implicit
+ // truncation - do this before constant folding.
+ if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
+ ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
+
+ ScalarOps.push_back(ScalarOp);
+ }
+
+ // Constant fold the scalar operands.
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+
+ // Legalize the (integer) scalar constant if necessary.
+ if (LegalSVT != SVT)
+ ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
+
+ // Scalar folding only succeeded if the result is a constant or UNDEF.
+ if (ScalarResult.getOpcode() != ISD::UNDEF &&
+ ScalarResult.getOpcode() != ISD::Constant &&
+ ScalarResult.getOpcode() != ISD::ConstantFP)
+ return SDValue();
+ ScalarResults.push_back(ScalarResult);
+ }
+
+ assert(ScalarResults.size() == NumElts &&
+ "Unexpected number of scalar results for BUILD_VECTOR");
+ return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults);
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SDValue N2, const SDNodeFlags *Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+ // Canonicalize constant to RHS if commutative.
+ if (isCommutativeBinOp(Opcode)) {
+ if (N1C && !N2C) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ } else if (N1CFP && !N2CFP) {
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ }
+ }
+
switch (Opcode) {
default: break;
case ISD::TokenFactor:
@@ -3356,6 +3484,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
case ISD::MUL:
case ISD::SDIV:
case ISD::SREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
@@ -3367,37 +3499,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
case ISD::FREM:
if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
- // 0+x --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
- if (CFP->getValueAPF().isZero())
- return N2;
// x+0 --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
- if (CFP->getValueAPF().isZero())
- return N1;
+ if (N2CFP && N2CFP->getValueAPF().isZero())
+ return N1;
} else if (Opcode == ISD::FSUB) {
// x-0 --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
- if (CFP->getValueAPF().isZero())
- return N1;
+ if (N2CFP && N2CFP->getValueAPF().isZero())
+ return N1;
} else if (Opcode == ISD::FMUL) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1);
- SDValue V = N2;
-
- // If the first operand isn't the constant, try the second
- if (!CFP) {
- CFP = dyn_cast<ConstantFPSDNode>(N2);
- V = N1;
- }
-
- if (CFP) {
- // 0*x --> 0
- if (CFP->isZero())
- return SDValue(CFP,0);
- // 1*x --> x
- if (CFP->isExactlyValue(1.0))
- return V;
- }
+ // x*0 --> 0
+ if (N2CFP && N2CFP->isZero())
+ return N2;
+ // x*1 --> x
+ if (N2CFP && N2CFP->isExactlyValue(1.0))
+ return N1;
}
}
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
@@ -3457,7 +3572,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
assert(VT.isFloatingPoint() &&
N1.getValueType().isFloatingPoint() &&
VT.bitsLE(N1.getValueType()) &&
- isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+ N2C && "Invalid FP_ROUND!");
if (N1.getValueType() == VT) return N1; // noop conversion.
break;
case ISD::AssertSext:
@@ -3502,13 +3617,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SmallVector<SDValue, 8> Ops;
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N1.getOperand(i);
- if (Op.getValueType() != VT.getScalarType()) break;
if (Op.getOpcode() == ISD::UNDEF) {
- Ops.push_back(Op);
+ Ops.push_back(getUNDEF(VT.getScalarType()));
continue;
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = C->getAPIntValue();
+ Val = Val.zextOrTrunc(VT.getScalarSizeInBits());
Ops.push_back(SignExtendInReg(Val));
continue;
}
@@ -3590,15 +3705,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
return N1.getOperand(N2C->getZExtValue());
// EXTRACT_ELEMENT of a constant int is also very common.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ if (N1C) {
unsigned ElementSize = VT.getSizeInBits();
unsigned Shift = ElementSize * N2C->getZExtValue();
- APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+ APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift);
return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
}
break;
- case ISD::EXTRACT_SUBVECTOR: {
- SDValue Index = N2;
+ case ISD::EXTRACT_SUBVECTOR:
if (VT.isSimple() && N1.getValueType().isSimple()) {
assert(VT.isVector() && N1.getValueType().isVector() &&
"Extract subvector VTs must be a vectors!");
@@ -3608,9 +3722,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
"Extract subvector must be from larger vector to smaller vector!");
- if (isa<ConstantSDNode>(Index)) {
- assert((VT.getVectorNumElements() +
- cast<ConstantSDNode>(Index)->getZExtValue()
+ if (N2C) {
+ assert((VT.getVectorNumElements() + N2C->getZExtValue()
<= N1.getValueType().getVectorNumElements())
&& "Extract subvector overflow!");
}
@@ -3621,29 +3734,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
break;
}
- }
// Perform trivial constant folding.
if (SDValue SV =
FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
return SV;
- // Canonicalize constant to RHS if commutative.
- if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
- std::swap(N1C, N2C);
- std::swap(N1, N2);
- }
-
// Constant fold FP operations.
bool HasFPExceptions = TLI->hasFloatingPointExceptions();
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
if (N1CFP) {
- if (!N2CFP && isCommutativeBinOp(Opcode)) {
- // Canonicalize constant to RHS if commutative.
- std::swap(N1CFP, N2CFP);
- std::swap(N1, N2);
- } else if (N2CFP) {
+ if (N2CFP) {
APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
APFloat::opStatus s;
switch (Opcode) {
@@ -3670,7 +3770,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
break;
case ISD::FREM :
- s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ s = V1.mod(V2);
if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
s!=APFloat::opDivByZero)) {
return getConstantFP(V1, DL, VT);
@@ -3795,7 +3895,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3) {
// Perform various simplifications.
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
switch (Opcode) {
case ISD::FMA: {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
@@ -3827,12 +3926,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
break;
case ISD::SETCC: {
// Use FoldSetCC to simplify SETCC's.
- SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
- if (Simp.getNode()) return Simp;
+ if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
+ return V;
+ // Vector constant folding.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return V;
break;
}
case ISD::SELECT:
- if (N1C) {
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
if (N1C->getZExtValue())
return N2; // select true, X, Y -> X
return N3; // select false, X, Y -> Y
@@ -4153,6 +4256,14 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
return true;
}
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction()->optForMinSize();
+ return MF.getFunction()->optForSize();
+}
+
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
@@ -4173,7 +4284,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4286,7 +4397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4380,7 +4491,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4446,6 +4557,16 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
+static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
+ unsigned AS) {
+ // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
+ // pointer operands can be losslessly bitcasted to pointers of address space 0
+ if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
+ report_fatal_error("cannot lower memory intrinsic in address space " +
+ Twine(AS));
+ }
+}
+
SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol, bool AlwaysInline,
@@ -4487,6 +4608,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
true, DstPtrInfo, SrcPtrInfo);
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
// memcpy is not guaranteed to be safe. libc memcpys aren't required to
// respect volatile, so they may do things like read or write memory
@@ -4548,6 +4672,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
return Result;
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
@@ -4605,6 +4732,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
return Result;
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+
// Emit a library call.
Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
@@ -4872,10 +5001,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+ int64_t Offset = 0) {
// If this is FI+Offset, we can model it.
if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
- return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+ return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ FI->getIndex(), Offset);
// If this is (FI+Offset1)+Offset2, we can model it.
if (Ptr.getOpcode() != ISD::ADD ||
@@ -4884,20 +5015,22 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
return MachinePointerInfo();
int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
- return MachinePointerInfo::getFixedStack(FI, Offset+
- cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+ return MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI,
+ Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
}
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+ SDValue OffsetOp) {
// If the 'Offset' value isn't a constant, we can't handle this.
if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
- return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
if (OffsetOp.getOpcode() == ISD::UNDEF)
- return InferPointerInfo(Ptr);
+ return InferPointerInfo(DAG, Ptr);
return MachinePointerInfo();
}
@@ -4926,7 +5059,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(Ptr, Offset);
+ PtrInfo = InferPointerInfo(*this, Ptr, Offset);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -5054,7 +5187,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
Flags |= MachineMemOperand::MONonTemporal;
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(Ptr);
+ PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -5109,7 +5242,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
Flags |= MachineMemOperand::MONonTemporal;
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(Ptr);
+ PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -5261,7 +5394,7 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- MaskedGatherSDNode *N =
+ MaskedGatherSDNode *N =
new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(),
Ops, VTs, VT, MMO);
CSEMap.InsertNode(N, IP);
@@ -5317,12 +5450,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
- ArrayRef<SDValue> Ops) {
+ ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
unsigned NumOps = Ops.size();
switch (NumOps) {
case 0: return getNode(Opcode, DL, VT);
case 1: return getNode(Opcode, DL, VT, Ops[0]);
- case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
default: break;
}
@@ -5656,7 +5789,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
"Update with wrong number of operands");
// If no operands changed just return the input node.
- if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+ if (std::equal(Ops.begin(), Ops.end(), N->op_begin()))
return N;
// See if the modified node already exists.
@@ -6451,13 +6584,13 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// Node Id fields for nodes At SortedPos and after will contain the
// count of outstanding operands.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
- SDNode *N = I++;
+ SDNode *N = &*I++;
checkForCycles(N, this);
unsigned Degree = N->getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
N->setNodeId(DAGSize++);
- allnodes_iterator Q = N;
+ allnodes_iterator Q(N);
if (Q != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
assert(SortedPos != AllNodes.end() && "Overran node list");
@@ -6470,8 +6603,8 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// Visit all the nodes. As we iterate, move nodes into sorted order,
// such that by the time the end is reached all nodes will be sorted.
- for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
- SDNode *N = I;
+ for (SDNode &Node : allnodes()) {
+ SDNode *N = &Node;
checkForCycles(N, this);
// N is in sorted position, so all its uses have one less operand
// that needs to be sorted.
@@ -6493,9 +6626,10 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
P->setNodeId(Degree);
}
}
- if (I == SortedPos) {
+ if (&Node == SortedPos) {
#ifndef NDEBUG
- SDNode *S = ++I;
+ allnodes_iterator I(N);
+ SDNode *S = &*++I;
dbgs() << "Overran sorted position:\n";
S->dumprFull(this); dbgs() << "\n";
dbgs() << "Checking if this is due to cycles\n";
@@ -6559,6 +6693,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
// SDNode Class
//===----------------------------------------------------------------------===//
+bool llvm::isNullConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isNullValue();
+}
+
+bool llvm::isNullFPConstant(SDValue V) {
+ ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+ return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
+bool llvm::isAllOnesConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isAllOnesValue();
+}
+
+bool llvm::isOneConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isOne();
+}
+
HandleSDNode::~HandleSDNode() {
DropOperands();
}
@@ -6772,6 +6926,12 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
}
+const SDNodeFlags *SDNode::getFlags() const {
+ if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
+ return &FlagsNode->Flags;
+ return nullptr;
+}
+
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -6808,9 +6968,11 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
}
switch (N->getOpcode()) {
- default:
- Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands));
+ default: {
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
+ N->getFlags()));
break;
+ }
case ISD::VSELECT:
Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
break;
@@ -7101,6 +7263,24 @@ BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
}
+int32_t
+BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
+ uint32_t BitWidth) const {
+ if (ConstantFPSDNode *CN =
+ dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
+ bool IsExact;
+ APSInt IntVal(BitWidth);
+ APFloat APF = CN->getValueAPF();
+ if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
+ APFloat::opOK ||
+ !IsExact)
+ return -1;
+
+ return IntVal.exactLogBase2();
+ }
+ return -1;
+}
+
bool BuildVectorSDNode::isConstant() const {
for (const SDValue &Op : op_values()) {
unsigned Opc = Op.getOpcode();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2c3c0eb1..d2ea85ab 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -63,6 +64,7 @@
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "isel"
@@ -79,7 +81,7 @@ LimitFPPrecision("limit-float-precision",
cl::init(0));
static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden,
+EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
cl::desc("Enable fast-math-flags for DAG nodes"));
// Limit the width of DAG chains. This is important in general to prevent
@@ -196,6 +198,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
if (PartEVT == ValueVT)
return Val;
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
+ ValueVT.bitsLT(PartEVT)) {
+ // For an FP value in an integer part, we need to truncate to the right
+ // width first.
+ PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
+ }
+
if (PartEVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartEVT)) {
// For a truncate, see if we have any information to
@@ -319,9 +329,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
"Cannot handle this kind of promotion");
// Promoted vector extract
- bool Smaller = ValueVT.bitsLE(PartEVT);
- return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, ValueVT, Val);
+ return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
}
@@ -339,11 +347,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
}
if (ValueVT.getVectorNumElements() == 1 &&
- ValueVT.getVectorElementType() != PartEVT) {
- bool Smaller = ValueVT.bitsLE(PartEVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, ValueVT.getScalarType(), Val);
- }
+ ValueVT.getVectorElementType() != PartEVT)
+ Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
}
@@ -387,6 +392,12 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
+ if (ValueVT.isFloatingPoint()) {
+ // FP values need to be bitcast, then extended if they are being put
+ // into a larger container.
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ }
assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
ValueVT.isInteger() &&
"Unknown mismatch!");
@@ -520,9 +531,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
// Promoted vector extract
- bool Smaller = PartEVT.bitsLE(ValueVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, PartVT, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else{
// Vector -> scalar conversion.
assert(ValueVT.getVectorNumElements() == 1 &&
@@ -531,9 +540,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
- bool Smaller = ValueVT.bitsLE(PartVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, PartVT, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
}
Parts[0] = Val;
@@ -595,8 +602,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
+ for (EVT ValueVT : ValueVTs) {
unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
@@ -907,7 +913,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
- if (!isa<TerminatorInst>(&I) && !HasTailCall)
+ if (!isa<TerminatorInst>(&I) && !HasTailCall &&
+ !isStatepoint(&I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
CurInst = nullptr;
@@ -943,14 +950,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
assert(Variable->isValidLocationForIntrinsic(dl) &&
"Expected inlined-at fields to agree");
uint64_t Offset = DI->getOffset();
- // A dbg.value for an alloca is always indirect.
- bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
SDDbgValue *SDV;
if (Val.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect,
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
Val)) {
SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
- IsIndirect, Offset, dl, DbgSDNodeOrder);
+ false, Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
@@ -1168,6 +1173,135 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
llvm_unreachable("Can't get register for value!");
}
+void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
+ MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
+ // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ CatchPadMBB->setIsEHFuncletEntry();
+
+ DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
+}
+
+void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
+ // Update machine-CFG edge.
+ MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
+ FuncInfo.MBB->addSuccessor(TargetMBB);
+
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ if (IsSEH) {
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (TargetMBB != NextBlock(FuncInfo.MBB) ||
+ TM.getOptLevel() == CodeGenOpt::None)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB)));
+ return;
+ }
+
+ // Figure out the funclet membership for the catchret's successor.
+ // This will be used by the FuncletLayout pass to determine how to order the
+ // BB's.
+ WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+ const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I];
+ assert(SuccessorColor && "No parent funclet for catchret!");
+ MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
+ assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
+
+ // Create the terminator node.
+ SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB),
+ DAG.getBasicBlock(SuccessorColorMBB));
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
+ // Don't emit any special code for the cleanuppad instruction. It just marks
+ // the start of a funclet.
+ FuncInfo.MBB->setIsEHFuncletEntry();
+ FuncInfo.MBB->setIsCleanupFuncletEntry();
+}
+
+/// When an invoke or a cleanupret unwinds to the next EH pad, there are
+/// many places it could ultimately go. In the IR, we have a single unwind
+/// destination, but in the machine CFG, we enumerate all the possible blocks.
+/// This function skips over imaginary basic blocks that hold catchswitch
+/// instructions, and finds all the "real" machine
+/// basic block destinations. As those destinations may not be successors of
+/// EHPadBB, here we also calculate the edge probability to those destinations.
+/// The passed-in Prob is the edge probability to EHPadBB.
+static void findUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality =
+ classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ break;
+ } else if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+}
+
+void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
+ // Update successor info.
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ auto UnwindDest = I.getUnwindDest();
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability UnwindDestProb =
+ (BPI && UnwindDest)
+ ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
+ }
+ FuncInfo.MBB->normalizeSuccProbs();
+
+ // Create the terminator node.
+ SDValue Ret =
+ DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
+ report_fatal_error("visitCatchSwitch not yet implemented!");
+}
+
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
@@ -1186,7 +1320,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
PtrValueVTs);
- SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+ SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
SmallVector<EVT, 4> ValueVTs;
@@ -1334,25 +1469,34 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
}
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
+BranchProbability
+SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
- if (!BPI)
- return 0;
const BasicBlock *SrcBB = Src->getBasicBlock();
const BasicBlock *DstBB = Dst->getBasicBlock();
- return BPI->getEdgeWeight(SrcBB, DstBB);
+ if (!BPI) {
+ // If BPI is not available, set the default probability as 1 / N, where N is
+ // the number of successors.
+ auto SuccSize = std::max<uint32_t>(
+ std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
+ return BranchProbability(1, SuccSize);
+ }
+ return BPI->getEdgeProbability(SrcBB, DstBB);
}
-void SelectionDAGBuilder::
-addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
- uint32_t Weight /* = 0 */) {
- if (!Weight)
- Weight = getEdgeWeight(Src, Dst);
- Src->addSuccessor(Dst, Weight);
+void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst,
+ BranchProbability Prob) {
+ if (!FuncInfo.BPI)
+ Src->addSuccessorWithoutProb(Dst);
+ else {
+ if (Prob.isUnknown())
+ Prob = getEdgeProbability(Src, Dst);
+ Src->addSuccessor(Dst, Prob);
+ }
}
-
static bool InBlock(const Value *V, const BasicBlock *BB) {
if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() == BB;
@@ -1369,8 +1513,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- uint32_t TWeight,
- uint32_t FWeight) {
+ BranchProbability TProb,
+ BranchProbability FProb) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
@@ -1385,17 +1529,15 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
Condition = getICmpCondCode(IC->getPredicate());
- } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
Condition = getFCmpCondCode(FC->getPredicate());
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
- } else {
- (void)Condition; // silence warning.
- llvm_unreachable("Unknown compare instruction");
}
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
- TBB, FBB, CurBB, TWeight, FWeight);
+ TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
return;
}
@@ -1403,26 +1545,19 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
- nullptr, TBB, FBB, CurBB, TWeight, FWeight);
+ nullptr, TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
}
-/// Scale down both weights to fit into uint32_t.
-static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
- uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
- uint32_t Scale = (NewMax / UINT32_MAX) + 1;
- NewTrue = NewTrue / Scale;
- NewFalse = NewFalse / Scale;
-}
-
/// FindMergedConditions - If Cond is an expression like
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- unsigned Opc, uint32_t TWeight,
- uint32_t FWeight) {
+ Instruction::BinaryOps Opc,
+ BranchProbability TProb,
+ BranchProbability FProb) {
// If this node is not part of the or/and tree, emit it as a branch.
const Instruction *BOp = dyn_cast<Instruction>(Cond);
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
@@ -1431,12 +1566,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
- TWeight, FWeight);
+ TProb, FProb);
return;
}
// Create TmpBB after CurBB.
- MachineFunction::iterator BBI = CurBB;
+ MachineFunction::iterator BBI(CurBB);
MachineFunction &MF = DAG.getMachineFunction();
MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
CurBB->getParent()->insert(++BBI, TmpBB);
@@ -1455,26 +1590,25 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
// The requirement is that
// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
// = TrueProb for original BB.
- // Assuming the original weights are A and B, one choice is to set BB1's
- // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
- // assumes that
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
// Another choice is to assume TrueProb for BB1 equals to TrueProb for
// TmpBB, but the math is more complicated.
- uint64_t NewTrueWeight = TWeight;
- uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ NewTrueProb, NewFalseProb);
- NewTrueWeight = TWeight;
- NewFalseWeight = 2 * (uint64_t)FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ Probs[0], Probs[1]);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
@@ -1491,24 +1625,23 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
// The requirement is that
// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
// = FalseProb for original BB.
- // Assuming the original weights are A and B, one choice is to set BB1's
- // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
- // assumes that
- // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
-
- uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight;
- uint64_t NewFalseWeight = FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ NewTrueProb, NewFalseProb);
- NewTrueWeight = 2 * (uint64_t)TWeight;
- NewFalseWeight = FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ Probs[0], Probs[1]);
}
}
@@ -1585,12 +1718,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (!DAG.getTargetLoweringInfo().isJumpExpensive() &&
- BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And ||
- BOp->getOpcode() == Instruction::Or)) {
+ Instruction::BinaryOps Opcode = BOp->getOpcode();
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
+ !I.getMetadata(LLVMContext::MD_unpredictable) &&
+ (Opcode == Instruction::And || Opcode == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
- BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
- getEdgeWeight(BrMBB, Succ1MBB));
+ Opcode,
+ getEdgeProbability(BrMBB, Succ0MBB),
+ getEdgeProbability(BrMBB, Succ1MBB));
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
@@ -1669,11 +1804,12 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
}
// Update successor info
- addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
// TrueBB and FalseBB are always different unless the incoming IR is
// degenerate. This only happens when running llc on weird IR.
if (CB.TrueBB != CB.FalseBB)
- addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+ addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
+ SwitchBB->normalizeSuccProbs();
// If the lhs block is the next block, invert the condition so that we can
// fall through to the lhs instead of the rhs block.
@@ -1797,10 +1933,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
GuardPtr, MachinePointerInfo(IRGuard, 0),
true, false, false, Align);
- SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
- StackSlotPtr,
- MachinePointerInfo::getFixedStack(FI),
- true, false, false, Align);
+ SDValue StackSlot = DAG.getLoad(
+ PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
+ false, false, Align);
// Perform the comparison via a subtract/getsetcc.
EVT VT = Guard.getValueType();
@@ -1837,7 +1973,7 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
- nullptr, 0, false, getCurSDLoc(), false, false).second;
+ None, false, getCurSDLoc(), false, false).second;
DAG.setRoot(Chain);
}
@@ -1884,8 +2020,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- addSuccessorWithWeight(SwitchBB, B.Default);
- addSuccessorWithWeight(SwitchBB, MBB);
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+ SwitchBB->normalizeSuccProbs();
SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, CopyTo, RangeCmp,
@@ -1902,7 +2039,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
/// visitBitTestCase - this function produces one "bit test"
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
- uint32_t BranchWeightToNext,
+ BranchProbability BranchProbToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
@@ -1938,10 +2075,14 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
}
- // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
- addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
- // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
- addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, getControlRoot(),
@@ -1958,9 +2099,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
- // Retrieve successors.
+ // Retrieve successors. Look through artificial IR level blocks like
+ // catchswitch for successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
- MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+ const BasicBlock *EHPadBB = I.getSuccessor(1);
const Value *Callee(I.getCalledValue());
const Function *Fn = dyn_cast<Function>(Callee);
@@ -1975,14 +2117,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
- visitPatchpoint(&I, LandingPad);
+ visitPatchpoint(&I, EHPadBB);
break;
case Intrinsic::experimental_gc_statepoint:
- LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+ LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
break;
}
} else
- LowerCallTo(&I, getValue(Callee), false, LandingPad);
+ LowerCallTo(&I, getValue(Callee), false, EHPadBB);
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
@@ -1992,9 +2134,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
CopyToExportRegsIfNeeded(&I);
}
- // Update successor info
- addSuccessorWithWeight(InvokeMBB, Return);
- addSuccessorWithWeight(InvokeMBB, LandingPad);
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
+
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, Return);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
// Drop into normal successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
@@ -2007,7 +2160,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
}
void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
- assert(FuncInfo.MBB->isLandingPad() &&
+ assert(FuncInfo.MBB->isEHPad() &&
"Call to landingpad not in landing pad!");
MachineBasicBlock *MBB = FuncInfo.MBB;
@@ -2017,8 +2170,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.getExceptionPointerRegister() == 0 &&
- TLI.getExceptionSelectorRegister() == 0)
+ const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
+ if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+ TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+ return;
+
+ // If landingpad's return type is token type, we don't create DAG nodes
+ // for its exception pointer and selector value. The extraction of exception
+ // pointer or selector value from token type landingpads is not currently
+ // supported.
+ if (LP.getType()->isTokenTy())
return;
SmallVector<EVT, 2> ValueVTs;
@@ -2074,8 +2235,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
// If this case has the same successor and is a neighbour, merge it into
// the previous cluster.
Clusters[DstIndex - 1].High = CaseVal;
- Clusters[DstIndex - 1].Weight += CC.Weight;
- assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!");
+ Clusters[DstIndex - 1].Prob += CC.Prob;
} else {
std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
sizeof(Clusters[SrcIndex]));
@@ -2109,8 +2269,9 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
continue;
MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
- addSuccessorWithWeight(IndirectBrMBB, Succ);
+ addSuccessorWithProb(IndirectBrMBB, Succ);
}
+ IndirectBrMBB->normalizeSuccProbs();
DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
MVT::Other, getControlRoot(),
@@ -2119,7 +2280,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
if (DAG.getTarget().Options.TrapUnreachable)
- DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+ DAG.setRoot(
+ DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
void SelectionDAGBuilder::visitFSub(const User &I) {
@@ -2260,6 +2422,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
+
+ // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
+ // FIXME: We should propagate the fast-math-flags to the DAG node itself for
+ // further optimization, but currently FMF is only applicable to binary nodes.
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
@@ -2284,27 +2450,74 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
// Min/max matching is only viable if all output VTs are the same.
if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
- Value *LHS, *RHS;
- SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
- ISD::NodeType Opc = ISD::DELETED_NODE;
- switch (SPF) {
- case SPF_UMAX: Opc = ISD::UMAX; break;
- case SPF_UMIN: Opc = ISD::UMIN; break;
- case SPF_SMAX: Opc = ISD::SMAX; break;
- case SPF_SMIN: Opc = ISD::SMIN; break;
- default: break;
- }
-
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
- while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
+
+ // We care about the legality of the operation after it has been type
+ // legalized.
+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
+ VT != TLI.getTypeToTransformTo(Ctx, VT))
VT = TLI.getTypeToTransformTo(Ctx, VT);
- if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) &&
- // If the underlying comparison instruction is used by any other instruction,
- // the consumed instructions won't be destroyed, so it is not profitable
- // to convert to a min/max.
+ // If the vselect is legal, assume we want to leave this as a vector setcc +
+ // vselect. Otherwise, if this is going to be scalarized, we want to see if
+ // min/max is legal on the scalar type.
+ bool UseScalarMinMax = VT.isVector() &&
+ !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
+
+ Value *LHS, *RHS;
+ auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+ ISD::NodeType Opc = ISD::DELETED_NODE;
+ switch (SPR.Flavor) {
+ case SPF_UMAX: Opc = ISD::UMAX; break;
+ case SPF_UMIN: Opc = ISD::UMIN; break;
+ case SPF_SMAX: Opc = ISD::SMAX; break;
+ case SPF_SMIN: Opc = ISD::SMIN; break;
+ case SPF_FMINNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_ANY: {
+ if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
+ Opc = ISD::FMINNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
+ Opc = ISD::FMINNAN;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
+ ISD::FMINNUM : ISD::FMINNAN;
+ break;
+ }
+ }
+ break;
+ case SPF_FMAXNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_ANY:
+
+ if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
+ Opc = ISD::FMAXNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
+ Opc = ISD::FMAXNAN;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
+ ISD::FMAXNUM : ISD::FMAXNAN;
+ break;
+ }
+ break;
+ default: break;
+ }
+
+ if (Opc != ISD::DELETED_NODE &&
+ (TLI.isOperationLegalOrCustom(Opc, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
+ // If the underlying comparison instruction is used by any other
+ // instruction, the consumed instructions won't be destroyed, so it is
+ // not profitable to convert to a min/max.
cast<SelectInst>(&I)->getCondition()->hasOneUse()) {
OpCode = Opc;
LHSVal = getValue(LHS);
@@ -2920,7 +3133,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
// throughout the function's lifetime.
bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
- isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout());
+ isDereferenceablePointer(SV, DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
@@ -2940,8 +3153,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(
- MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
+ else if (AA->pointsToConstantMemory(MemoryLocation(
+ SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3056,7 +3269,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
- // llvm.masked.store.*(Src0, Ptr, alignemt, Mask)
+ // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Value *PtrOperand = I.getArgOperand(1);
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(I.getArgOperand(0));
@@ -3080,63 +3293,70 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
setValue(&I, StoreNode);
}
-// Gather/scatter receive a vector of pointers.
-// This vector of pointers may be represented as a base pointer + vector of
-// indices, it depends on GEP and instruction preceeding GEP
-// that calculates indices
-static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
+// Get a uniform base for the Gather/Scatter intrinsic.
+// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
+// We try to represent it as a base pointer + vector of indices.
+// Usually, the vector of pointers comes from a 'getelementptr' instruction.
+// The first operand of the GEP may be a single pointer or a vector of pointers
+// Example:
+// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
+// or
+// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
+// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
+//
+// When the first GEP operand is a single pointer - it is the uniform base we
+// are looking for. If first operand of the GEP is a splat vector - we
+// extract the spalt value and use it as a uniform base.
+// In all other cases the function returns 'false'.
+//
+static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
SelectionDAGBuilder* SDB) {
- assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type");
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
- if (!Gep || Gep->getNumOperands() > 2)
+ SelectionDAG& DAG = SDB->DAG;
+ LLVMContext &Context = *DAG.getContext();
+
+ assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP || GEP->getNumOperands() > 2)
return false;
- ShuffleVectorInst *ShuffleInst =
- dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand());
- if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
- cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
- Instruction::InsertElement)
+
+ const Value *GEPPtr = GEP->getPointerOperand();
+ if (!GEPPtr->getType()->isVectorTy())
+ Ptr = GEPPtr;
+ else if (!(Ptr = getSplatValue(GEPPtr)))
return false;
- Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
+ Value *IndexVal = GEP->getOperand(1);
- SelectionDAG& DAG = SDB->DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // Check is the Ptr is inside current basic block
- // If not, look for the shuffle instruction
- if (SDB->findValue(Ptr))
- Base = SDB->getValue(Ptr);
- else if (SDB->findValue(ShuffleInst)) {
- SDValue ShuffleNode = SDB->getValue(ShuffleInst);
- SDLoc sdl = ShuffleNode;
- Base = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, sdl,
- ShuffleNode.getValueType().getScalarType(), ShuffleNode,
- DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDB->setValue(Ptr, Base);
- }
- else
+ // The operands of the GEP may be defined in another basic block.
+ // In this case we'll not find nodes for the operands.
+ if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
return false;
- Value *IndexVal = Gep->getOperand(1);
- if (SDB->findValue(IndexVal)) {
- Index = SDB->getValue(IndexVal);
+ Base = SDB->getValue(Ptr);
+ Index = SDB->getValue(IndexVal);
- if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+ // Suppress sign extension.
+ if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+ if (SDB->findValue(Sext->getOperand(0))) {
IndexVal = Sext->getOperand(0);
- if (SDB->findValue(IndexVal))
- Index = SDB->getValue(IndexVal);
+ Index = SDB->getValue(IndexVal);
}
- return true;
}
- return false;
+ if (!Index.getValueType().isVector()) {
+ unsigned GEPWidth = GEP->getType()->getVectorNumElements();
+ EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
+ SmallVector<SDValue, 16> Ops(GEPWidth, Index);
+ Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+ }
+ return true;
}
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
- Value *Ptr = I.getArgOperand(1);
+ const Value *Ptr = I.getArgOperand(1);
SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));
EVT VT = Src0.getValueType();
@@ -3150,10 +3370,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Base;
SDValue Index;
- Value *BasePtr = Ptr;
+ const Value *BasePtr = Ptr;
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
- Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
+ const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
MachineMemOperand::MOStore, VT.getStoreSize(),
@@ -3190,7 +3410,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
SDValue InChain = DAG.getRoot();
if (AA->pointsToConstantMemory(MemoryLocation(
- PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+ PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
InChain = DAG.getEntryNode();
}
@@ -3212,7 +3433,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
- Value *Ptr = I.getArgOperand(0);
+ const Value *Ptr = I.getArgOperand(0);
SDValue Src0 = getValue(I.getArgOperand(3));
SDValue Mask = getValue(I.getArgOperand(2));
@@ -3229,12 +3450,13 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
- Value *BasePtr = Ptr;
+ const Value *BasePtr = Ptr;
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
bool ConstantMemory = false;
if (UniformBase &&
- AA->pointsToConstantMemory(
- MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+ AA->pointsToConstantMemory(MemoryLocation(
+ BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3511,6 +3733,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {
static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
SelectionDAG &DAG) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
// IntegerPartOfX = ((int32_t)(t0);
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
@@ -3609,6 +3833,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
//
// #define LOG2OFe 1.4426950f
// t0 = Op * LOG2OFe
+
+ // TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
getF32Constant(DAG, 0x3fb8aa3b, dl));
return getLimitedPrecisionExp2(t0, dl, DAG);
@@ -3622,6 +3848,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3718,6 +3947,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3813,6 +4045,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3922,6 +4157,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
}
}
+ // TODO: What fast-math-flags should be set on the FMUL node?
if (IsExp10) {
// Put the exponent in the right bit position for later addition to the
// final result:
@@ -3955,9 +4191,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
- // If optimizing for size, don't insert too many multiplies. This
- // inserts up to 5 multiplies.
+ if (!F->optForSize() ||
+ // If optimizing for size, don't insert too many multiplies.
+ // This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
@@ -3965,6 +4201,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
// the benefit of being both really simple and much better than a libcall.
SDValue Res; // Logically starts equal to 1.0
SDValue CurSquare = LHS;
+ // TODO: Intrinsics should have fast-math-flags that propagate to these
+ // nodes.
while (Val) {
if (Val & 1) {
if (Res.getNode())
@@ -3990,22 +4228,20 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
-// getTruncatedArgReg - Find underlying register used for an truncated
-// argument.
-static unsigned getTruncatedArgReg(const SDValue &N) {
- if (N.getOpcode() != ISD::TRUNCATE)
+// getUnderlyingArgReg - Find underlying register used for a truncated or
+// bitcasted argument.
+static unsigned getUnderlyingArgReg(const SDValue &N) {
+ switch (N.getOpcode()) {
+ case ISD::CopyFromReg:
+ return cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ case ISD::BITCAST:
+ case ISD::AssertZext:
+ case ISD::AssertSext:
+ case ISD::TRUNCATE:
+ return getUnderlyingArgReg(N.getOperand(0));
+ default:
return 0;
-
- const SDValue &Ext = N.getOperand(0);
- if (Ext.getOpcode() == ISD::AssertZext ||
- Ext.getOpcode() == ISD::AssertSext) {
- const SDValue &CFR = Ext.getOperand(0);
- if (CFR.getOpcode() == ISD::CopyFromReg)
- return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
- if (CFR.getOpcode() == ISD::TRUNCATE)
- return getTruncatedArgReg(CFR);
}
- return 0;
}
/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
@@ -4033,11 +4269,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
Op = MachineOperand::CreateFI(FI);
if (!Op && N.getNode()) {
- unsigned Reg;
- if (N.getOpcode() == ISD::CopyFromReg)
- Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
- else
- Reg = getTruncatedArgReg(N);
+ unsigned Reg = getUnderlyingArgReg(N);
if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned PR = RegInfo.getLiveInPhysReg(Reg);
@@ -4145,14 +4377,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::longjmp:
return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4169,12 +4393,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memset: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4189,14 +4407,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memmove: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4238,33 +4448,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
// Parameters are handled specially.
- bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable ||
- isa<Argument>(Address);
-
- const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-
- if (isParameter && !AI) {
- FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
- if (FINode)
- // Byval parameter. We have a frame index at this point.
- SDV = DAG.getFrameIndexDbgValue(
- Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder);
- else {
- // Address is an argument, so try to emit its dbg value using
- // virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
- N);
- return nullptr;
- }
- } else if (AI)
+ bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+ auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (isParameter && FINode) {
+ // Byval parameter. We have a frame index at this point.
+ SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
+ FINode->getIndex(), 0, dl, SDNodeOrder);
+ } else if (isa<Argument>(Address)) {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+ N);
+ return nullptr;
+ } else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, 0, dl, SDNodeOrder);
- else {
- // Can't do anything with other non-AI cases yet.
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
- DEBUG(Address->dump());
- return nullptr;
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
@@ -4315,12 +4513,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
- // A dbg.value for an alloca is always indirect.
- bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
- IsIndirect, N)) {
+ false, N)) {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
- IsIndirect, Offset, dl, SDNodeOrder);
+ false, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
} else if (!V->use_empty() ) {
@@ -4421,6 +4617,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getRoot(), getValue(I.getArgOperand(0))));
return nullptr;
}
+ case Intrinsic::eh_sjlj_setup_dispatch: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
+ getRoot()));
+ return nullptr;
+ }
case Intrinsic::masked_gather:
visitMaskedGather(I);
@@ -4614,6 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
} else {
+ // TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
@@ -4652,6 +4854,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Res.getValue(1));
return nullptr;
}
+ case Intrinsic::bitreverse:
+ setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return nullptr;
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -4693,6 +4900,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
return nullptr;
}
+ case Intrinsic::get_dynamic_area_offset: {
+ SDValue Op = getRoot();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
+ // target.
+ if (PtrTy != ResTy)
+ report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
+ " intrinsic!");
+ Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
+ Op);
+ DAG.setRoot(Op);
+ setValue(&I, Res);
+ return nullptr;
+ }
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
@@ -4743,8 +4965,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
- Res = DAG.getStore(Chain, sdl, Src, FIN,
- MachinePointerInfo::getFixedStack(FI),
+ Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI),
true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
@@ -4946,9 +5168,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::clear_cache:
return TLI.getClearCacheBuiltinName();
- case Intrinsic::eh_actions:
- setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
- return nullptr;
case Intrinsic::donothing:
// ignore
return nullptr;
@@ -4965,9 +5184,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
visitStatepoint(I);
return nullptr;
}
- case Intrinsic::experimental_gc_result_int:
- case Intrinsic::experimental_gc_result_float:
- case Intrinsic::experimental_gc_result_ptr:
case Intrinsic::experimental_gc_result: {
visitGCResult(I);
return nullptr;
@@ -4978,7 +5194,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
-
+ case Intrinsic::instrprof_value_profile:
+ llvm_unreachable("instrprof failed to lower a value profiling call");
case Intrinsic::localescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
@@ -5032,19 +5249,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
- case Intrinsic::eh_begincatch:
- case Intrinsic::eh_endcatch:
- llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
+
+ case Intrinsic::eh_exceptionpointer:
case Intrinsic::eh_exceptioncode: {
- unsigned Reg = TLI.getExceptionPointerRegister();
- assert(Reg && "cannot get exception code on this platform");
+ // Get the exception pointer vreg, copy from it, and resize it to fit.
+ const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
- assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad");
- unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
+ unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
SDValue N =
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
- N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+ if (Intrinsic == Intrinsic::eh_exceptioncode)
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
setValue(&I, N);
return nullptr;
}
@@ -5053,11 +5269,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
MCSymbol *BeginLabel = nullptr;
- if (LandingPad) {
+ if (EHPadBB) {
// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
BeginLabel = MMI.getContext().createTempSymbol();
@@ -5067,7 +5283,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
unsigned CallSiteIndex = MMI.getCurrentCallSite();
if (CallSiteIndex) {
MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
- LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+ LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
// Now that the call site is handled, stop tracking it.
MMI.setCurrentCallSite(0);
@@ -5100,14 +5316,21 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
DAG.setRoot(Result.second);
}
- if (LandingPad) {
+ if (EHPadBB) {
// Insert a label at the end of the invoke call to mark the try range. This
// can be used to detect deletion of the invoke via the MachineModuleInfo.
MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
// Inform MachineModuleInfo of range.
- MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+ if (MMI.hasEHFunclets()) {
+ assert(CLI.CS);
+ WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+ EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
+ BeginLabel, EndLabel);
+ } else {
+ MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+ }
}
return Result;
@@ -5115,7 +5338,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
bool isTailCall,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
Type *RetTy = FTy->getReturnType();
@@ -5154,7 +5377,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
.setCallee(RetTy, FTy, Callee, std::move(Args), CS)
.setTailCall(isTailCall);
- std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad);
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode())
setValue(CS.getInstruction(), Result.first);
@@ -5978,7 +6201,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
std::pair<unsigned, const TargetRegisterClass *> MatchRC =
TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
@@ -6037,10 +6260,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot =
DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
- Chain = DAG.getStore(Chain, getCurSDLoc(),
- OpInfo.CallOperand, StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ false, false, 0);
OpInfo.CallOperand = StackSlot;
}
@@ -6460,12 +6683,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// This is a helper for lowering intrinsics that follow a target calling
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
-std::pair<SDValue, SDValue>
-SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
- unsigned NumArgs, SDValue Callee,
- Type *ReturnTy,
- MachineBasicBlock *LandingPad,
- bool IsPatchPoint) {
+std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
+ ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee,
+ Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
@@ -6489,7 +6709,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
.setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
.setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
- return lowerInvokable(CLI, LandingPad);
+ return lowerInvokable(CLI, EHPadBB);
}
/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
@@ -6593,7 +6813,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
// i8* <target>,
@@ -6630,9 +6850,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
Type *ReturnTy =
IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
- std::pair<SDValue, SDValue> Result =
- lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
- LandingPad, true);
+ std::pair<SDValue, SDValue> Result = lowerCallOperands(
+ CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true);
SDNode *CallEnd = Result.second.getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
@@ -6926,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
i, j*Parts[j].getValueType().getStoreSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
- else if (j != 0)
+ else if (j != 0) {
MyFlags.Flags.setOrigAlign(1);
+ if (j == NumParts - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
@@ -6986,8 +7208,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
PtrVT));
SDValue L = CLI.DAG.getLoad(
RetTys[i], CLI.DL, CLI.Chain, Add,
- MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
- false, false, 1);
+ MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
+ DemoteStackIdx, Offsets[i]),
+ false, false, false, 1);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
@@ -7069,9 +7292,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
if (FastISel)
return A->use_empty();
- const BasicBlock *Entry = A->getParent()->begin();
+ const BasicBlock &Entry = A->getParent()->front();
for (const User *U : A->users())
- if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+ if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
return false; // Use not in entry block.
return true;
@@ -7138,6 +7361,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// in the various CC lowering callbacks.
Flags.setByVal();
}
+ if (F.getCallingConv() == CallingConv::X86_INTR) {
+ // IA Interrupt passes frame (1st parameter) by value in the stack.
+ if (Idx == 1)
+ Flags.setByVal();
+ }
if (Flags.isByVal() || Flags.isInAlloca()) {
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
@@ -7165,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
- else if (i > 0)
+ else if (i > 0) {
MyFlags.Flags.setOrigAlign(1);
+ if (i == NumRegs - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
Ins.push_back(MyFlags);
}
if (NeedsRegBlock && Value == NumValues - 1)
@@ -7235,12 +7466,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If this argument is unused then remember its value. It is used to generate
// debugging information.
if (I->use_empty() && NumValues) {
- SDB->setUnusedArgValue(I, InVals[i]);
+ SDB->setUnusedArgValue(&*I, InVals[i]);
// Also remember any frame index for use in FastISel.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
- FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
}
for (unsigned Val = 0; Val != NumValues; ++Val) {
@@ -7270,18 +7501,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Note down frame index.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
- FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
- SDB->setValue(I, Res);
+ SDB->setValue(&*I, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
}
// If this argument is live outside of the entry block, insert a copy from
@@ -7293,13 +7524,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// uses with vregs.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- FuncInfo->ValueMap[I] = Reg;
+ FuncInfo->ValueMap[&*I] = Reg;
continue;
}
}
- if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
- FuncInfo->InitializeRegForValue(I);
- SDB->CopyToExportRegsIfNeeded(I);
+ if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&*I);
+ SDB->CopyToExportRegsIfNeeded(&*I);
}
}
@@ -7401,21 +7632,21 @@ AddSuccessorMBB(const BasicBlock *BB,
// If SuccBB has not been created yet, create it.
if (!SuccMBB) {
MachineFunction *MF = ParentMBB->getParent();
- MachineFunction::iterator BBI = ParentMBB;
+ MachineFunction::iterator BBI(ParentMBB);
SuccMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(++BBI, SuccMBB);
}
// Add it as a successor of ParentMBB.
ParentMBB->addSuccessor(
- SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
+ SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
return SuccMBB;
}
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
- MachineFunction::iterator I = MBB;
+ MachineFunction::iterator I(MBB);
if (++I == FuncInfo.MF->end())
return nullptr;
- return I;
+ return &*I;
}
/// During lowering new call nodes can be created (such as memset, etc.).
@@ -7469,14 +7700,18 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
CaseCluster &JTCluster) {
assert(First <= Last);
- uint32_t Weight = 0;
+ auto Prob = BranchProbability::getZero();
unsigned NumCmps = 0;
std::vector<MachineBasicBlock*> Table;
- DenseMap<MachineBasicBlock*, uint32_t> JTWeights;
+ DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
+
+ // Initialize probabilities in JTProbs.
+ for (unsigned I = First; I <= Last; ++I)
+ JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+
for (unsigned I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
- Weight += Clusters[I].Weight;
- assert(Weight >= Clusters[I].Weight && "Weight overflow!");
+ Prob += Clusters[I].Prob;
APInt Low = Clusters[I].Low->getValue();
APInt High = Clusters[I].High->getValue();
NumCmps += (Low == High) ? 1 : 2;
@@ -7491,10 +7726,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
for (uint64_t J = 0; J < ClusterSize; ++J)
Table.push_back(Clusters[I].MBB);
- JTWeights[Clusters[I].MBB] += Clusters[I].Weight;
+ JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
}
- unsigned NumDests = JTWeights.size();
+ unsigned NumDests = JTProbs.size();
if (isSuitableForBitTests(NumDests, NumCmps,
Clusters[First].Low->getValue(),
Clusters[Last].High->getValue())) {
@@ -7513,9 +7748,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
for (MachineBasicBlock *Succ : Table) {
if (Done.count(Succ))
continue;
- addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]);
+ addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
Done.insert(Succ);
}
+ JumpTableMBB->normalizeSuccProbs();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
@@ -7529,7 +7765,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
JTCases.emplace_back(std::move(JTH), std::move(JT));
JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
- JTCases.size() - 1, Weight);
+ JTCases.size() - 1, Prob);
return true;
}
@@ -7707,19 +7943,29 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
.getSizeInBits();
assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
- if (Low.isNonNegative() && High.slt(BitWidth)) {
- // Optimize the case where all the case values fit in a
- // word without having to subtract minValue. In this case,
- // we can optimize away the subtraction.
+ // Check if the clusters cover a contiguous range such that no value in the
+ // range will jump to the default statement.
+ bool ContiguousRange = true;
+ for (int64_t I = First + 1; I <= Last; ++I) {
+ if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+ ContiguousRange = false;
+ break;
+ }
+ }
+
+ if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+ // Optimize the case where all the case values fit in a word without having
+ // to subtract minValue. In this case, we can optimize away the subtraction.
LowBound = APInt::getNullValue(Low.getBitWidth());
CmpRange = High;
+ ContiguousRange = false;
} else {
LowBound = Low;
CmpRange = High - Low;
}
CaseBitsVector CBV;
- uint32_t TotalWeight = 0;
+ auto TotalProb = BranchProbability::getZero();
for (unsigned i = First; i <= Last; ++i) {
// Find the CaseBits for this destination.
unsigned j;
@@ -7727,39 +7973,40 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
if (CBV[j].BB == Clusters[i].MBB)
break;
if (j == CBV.size())
- CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0));
+ CBV.push_back(
+ CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
CaseBits *CB = &CBV[j];
- // Update Mask, Bits and ExtraWeight.
+ // Update Mask, Bits and ExtraProb.
uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
CB->Bits += Hi - Lo + 1;
- CB->ExtraWeight += Clusters[i].Weight;
- TotalWeight += Clusters[i].Weight;
- assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!");
+ CB->ExtraProb += Clusters[i].Prob;
+ TotalProb += Clusters[i].Prob;
}
BitTestInfo BTI;
std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
- // Sort by weight first, number of bits second.
- if (a.ExtraWeight != b.ExtraWeight)
- return a.ExtraWeight > b.ExtraWeight;
+ // Sort by probability first, number of bits second.
+ if (a.ExtraProb != b.ExtraProb)
+ return a.ExtraProb > b.ExtraProb;
return a.Bits > b.Bits;
});
for (auto &CB : CBV) {
MachineBasicBlock *BitTestBB =
FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
- BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
+ BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
}
BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
- SI->getCondition(), -1U, MVT::Other, false, nullptr,
- nullptr, std::move(BTI));
+ SI->getCondition(), -1U, MVT::Other, false,
+ ContiguousRange, nullptr, nullptr, std::move(BTI),
+ TotalProb);
BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
- BitTestCases.size() - 1, TotalWeight);
+ BitTestCases.size() - 1, TotalProb);
return true;
}
@@ -7868,9 +8115,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *DefaultMBB) {
MachineFunction *CurMF = FuncInfo.MF;
MachineBasicBlock *NextMBB = nullptr;
- MachineFunction::iterator BBI = W.MBB;
+ MachineFunction::iterator BBI(W.MBB);
if (++BBI != FuncInfo.MF->end())
- NextMBB = BBI;
+ NextMBB = &*BBI;
unsigned Size = W.LastCluster - W.FirstCluster + 1;
@@ -7906,13 +8153,16 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
ISD::SETEQ);
// Update successor info.
- // Both Small and Big will jump to Small.BB, so we sum up the weights.
- addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight);
- addSuccessorWithWeight(
- SwitchMBB, DefaultMBB,
- // The default destination is the first successor in IR.
- BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0)
- : 0);
+ // Both Small and Big will jump to Small.BB, so we sum up the
+ // probabilities.
+ addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
+ if (BPI)
+ addSuccessorWithProb(
+ SwitchMBB, DefaultMBB,
+ // The default destination is the first successor in IR.
+ BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
+ else
+ addSuccessorWithProb(SwitchMBB, DefaultMBB);
// Insert the true branch.
SDValue BrCond =
@@ -7929,17 +8179,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
if (TM.getOptLevel() != CodeGenOpt::None) {
- // Order cases by weight so the most likely case will be checked first.
+ // Order cases by probability so the most likely case will be checked first.
std::sort(W.FirstCluster, W.LastCluster + 1,
[](const CaseCluster &a, const CaseCluster &b) {
- return a.Weight > b.Weight;
+ return a.Prob > b.Prob;
});
// Rearrange the case blocks so that the last one falls through if possible
- // without without changing the order of weights.
+ // without without changing the order of probabilities.
for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
--I;
- if (I->Weight > W.LastCluster->Weight)
+ if (I->Prob > W.LastCluster->Prob)
break;
if (I->Kind == CC_Range && I->MBB == NextMBB) {
std::swap(*I, *W.LastCluster);
@@ -7948,12 +8198,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- // Compute total weight.
- uint32_t UnhandledWeights = 0;
- for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
- UnhandledWeights += I->Weight;
- assert(UnhandledWeights >= I->Weight && "Weight overflow!");
- }
+ // Compute total probability.
+ BranchProbability DefaultProb = W.DefaultProb;
+ BranchProbability UnhandledProbs = DefaultProb;
+ for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+ UnhandledProbs += I->Prob;
MachineBasicBlock *CurMBB = W.MBB;
for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
@@ -7967,6 +8216,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
+ UnhandledProbs -= I->Prob;
switch (I->Kind) {
case CC_JumpTable: {
@@ -7977,8 +8227,28 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
// The jump block hasn't been inserted yet; insert it here.
MachineBasicBlock *JumpMBB = JT->MBB;
CurMF->insert(BBI, JumpMBB);
- addSuccessorWithWeight(CurMBB, Fallthrough);
- addSuccessorWithWeight(CurMBB, JumpMBB);
+
+ auto JumpProb = I->Prob;
+ auto FallthroughProb = UnhandledProbs;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default probability to successors of CurMBB. Also
+ // update the probability on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpProb += DefaultProb / 2;
+ FallthroughProb -= DefaultProb / 2;
+ JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+ JumpMBB->normalizeSuccProbs();
+ break;
+ }
+ }
+
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+ CurMBB->normalizeSuccProbs();
// The jump table header will be inserted in our current block, do the
// range check, and fall through to our fallthrough block.
@@ -8004,8 +8274,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->Parent = CurMBB;
BTB->Default = Fallthrough;
- // If we're in the right place, emit the bit test header header right now.
- if (CurMBB ==SwitchMBB) {
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
BTB->Emitted = true;
}
@@ -8028,10 +8307,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
RHS = I->High;
}
- // The false weight is the sum of all unhandled cases.
- UnhandledWeights -= I->Weight;
- CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
- UnhandledWeights);
+ // The false probability is the sum of all unhandled cases.
+ CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob,
+ UnhandledProbs);
if (CurMBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -8049,8 +8327,8 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
CaseClusterIt First,
CaseClusterIt Last) {
return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
- if (X.Weight != CC.Weight)
- return X.Weight > CC.Weight;
+ if (X.Prob != CC.Prob)
+ return X.Prob > CC.Prob;
// Ties are broken by comparing the case value.
return X.Low->getValue().slt(CC.Low->getValue());
@@ -8066,24 +8344,24 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
- // Balance the tree based on branch weights to create a near-optimal (in terms
- // of search time given key frequency) binary search tree. See e.g. Kurt
+ // Balance the tree based on branch probabilities to create a near-optimal (in
+ // terms of search time given key frequency) binary search tree. See e.g. Kurt
// Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
CaseClusterIt LastLeft = W.FirstCluster;
CaseClusterIt FirstRight = W.LastCluster;
- uint32_t LeftWeight = LastLeft->Weight;
- uint32_t RightWeight = FirstRight->Weight;
+ auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
+ auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
// Move LastLeft and FirstRight towards each other from opposite directions to
- // find a partitioning of the clusters which balances the weight on both
- // sides. If LeftWeight and RightWeight are equal, alternate which side is
- // taken to ensure 0-weight nodes are distributed evenly.
+ // find a partitioning of the clusters which balances the probability on both
+ // sides. If LeftProb and RightProb are equal, alternate which side is
+ // taken to ensure 0-probability nodes are distributed evenly.
unsigned I = 0;
while (LastLeft + 1 < FirstRight) {
- if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1)))
- LeftWeight += (++LastLeft)->Weight;
+ if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
+ LeftProb += (++LastLeft)->Prob;
else
- RightWeight += (--FirstRight)->Weight;
+ RightProb += (--FirstRight)->Prob;
I++;
}
@@ -8144,7 +8422,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
const ConstantInt *Pivot = PivotCluster->Low;
// New blocks will be inserted immediately after the current one.
- MachineFunction::iterator BBI = W.MBB;
+ MachineFunction::iterator BBI(W.MBB);
++BBI;
// We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
@@ -8158,7 +8436,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
} else {
LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, LeftMBB);
- WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
+ WorkList.push_back(
+ {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
@@ -8173,14 +8452,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
} else {
RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, RightMBB);
- WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
+ WorkList.push_back(
+ {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
// Create the CaseBlock record that will be used to lower the branch.
CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
- LeftWeight, RightWeight);
+ LeftProb, RightProb);
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -8196,9 +8476,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
for (auto I : SI.cases()) {
MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
const ConstantInt *CaseVal = I.getCaseValue();
- uint32_t Weight =
- BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0;
- Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
+ BranchProbability Prob =
+ BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+ : BranchProbability(1, SI.getNumCases() + 1);
+ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
}
MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
@@ -8274,7 +8555,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
- WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
+ auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
+ WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.back();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 7006754..49a3872 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -17,6 +17,7 @@
#include "StatepointLowering.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -30,7 +31,6 @@
namespace llvm {
class AddrSpaceCastInst;
-class AliasAnalysis;
class AllocaInst;
class BasicBlock;
class BitCastInst;
@@ -154,39 +154,39 @@ private:
unsigned JTCasesIndex;
unsigned BTCasesIndex;
};
- uint32_t Weight;
+ BranchProbability Prob;
static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
- MachineBasicBlock *MBB, uint32_t Weight) {
+ MachineBasicBlock *MBB, BranchProbability Prob) {
CaseCluster C;
C.Kind = CC_Range;
C.Low = Low;
C.High = High;
C.MBB = MBB;
- C.Weight = Weight;
+ C.Prob = Prob;
return C;
}
static CaseCluster jumpTable(const ConstantInt *Low,
const ConstantInt *High, unsigned JTCasesIndex,
- uint32_t Weight) {
+ BranchProbability Prob) {
CaseCluster C;
C.Kind = CC_JumpTable;
C.Low = Low;
C.High = High;
C.JTCasesIndex = JTCasesIndex;
- C.Weight = Weight;
+ C.Prob = Prob;
return C;
}
static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
- unsigned BTCasesIndex, uint32_t Weight) {
+ unsigned BTCasesIndex, BranchProbability Prob) {
CaseCluster C;
C.Kind = CC_BitTests;
C.Low = Low;
C.High = High;
C.BTCasesIndex = BTCasesIndex;
- C.Weight = Weight;
+ C.Prob = Prob;
return C;
}
};
@@ -198,13 +198,13 @@ private:
uint64_t Mask;
MachineBasicBlock* BB;
unsigned Bits;
- uint32_t ExtraWeight;
+ BranchProbability ExtraProb;
CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
- uint32_t Weight):
- Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
+ BranchProbability Prob):
+ Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { }
- CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
+ CaseBits() : Mask(0), BB(nullptr), Bits(0) {}
};
typedef std::vector<CaseBits> CaseBitsVector;
@@ -217,13 +217,13 @@ private:
/// blocks needed by multi-case switch statements.
struct CaseBlock {
CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
- const Value *cmpmiddle,
- MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
- MachineBasicBlock *me,
- uint32_t trueweight = 0, uint32_t falseweight = 0)
- : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
- TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
- TrueWeight(trueweight), FalseWeight(falseweight) { }
+ const Value *cmpmiddle, MachineBasicBlock *truebb,
+ MachineBasicBlock *falsebb, MachineBasicBlock *me,
+ BranchProbability trueprob = BranchProbability::getUnknown(),
+ BranchProbability falseprob = BranchProbability::getUnknown())
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob),
+ FalseProb(falseprob) {}
// CC - the condition code to use for the case block's setcc node
ISD::CondCode CC;
@@ -239,8 +239,8 @@ private:
// ThisBB - the block into which to emit the code for the setcc and branches
MachineBasicBlock *ThisBB;
- // TrueWeight/FalseWeight - branch weights.
- uint32_t TrueWeight, FalseWeight;
+ // TrueProb/FalseProb - branch weights.
+ BranchProbability TrueProb, FalseProb;
};
struct JumpTable {
@@ -272,32 +272,35 @@ private:
struct BitTestCase {
BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
- uint32_t Weight):
- Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { }
+ BranchProbability Prob):
+ Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { }
uint64_t Mask;
MachineBasicBlock *ThisBB;
MachineBasicBlock *TargetBB;
- uint32_t ExtraWeight;
+ BranchProbability ExtraProb;
};
typedef SmallVector<BitTestCase, 3> BitTestInfo;
struct BitTestBlock {
- BitTestBlock(APInt F, APInt R, const Value* SV,
- unsigned Rg, MVT RgVT, bool E,
- MachineBasicBlock* P, MachineBasicBlock* D,
- BitTestInfo C):
- First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
- Parent(P), Default(D), Cases(std::move(C)) { }
+ BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
+ bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+ BitTestInfo C, BranchProbability Pr)
+ : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
+ ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)),
+ Prob(Pr) {}
APInt First;
APInt Range;
const Value *SValue;
unsigned Reg;
MVT RegVT;
bool Emitted;
+ bool ContiguousRange;
MachineBasicBlock *Parent;
MachineBasicBlock *Default;
BitTestInfo Cases;
+ BranchProbability Prob;
+ BranchProbability DefaultProb;
};
/// Minimum jump table density, in percent.
@@ -339,6 +342,7 @@ private:
CaseClusterIt LastCluster;
const ConstantInt *GE;
const ConstantInt *LT;
+ BranchProbability DefaultProb;
};
typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
@@ -515,6 +519,7 @@ private:
void resetPerFunctionState() {
FailureMBB = nullptr;
Guard = nullptr;
+ GuardReg = 0;
}
MachineBasicBlock *getParentMBB() { return ParentMBB; }
@@ -592,10 +597,6 @@ public:
///
FunctionLoweringInfo &FuncInfo;
- /// OptLevel - What optimization level we're generating code for.
- ///
- CodeGenOpt::Level OptLevel;
-
/// GFI - Garbage collection metadata for the function.
GCFunctionInfo *GFI;
@@ -613,7 +614,7 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
: CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
- DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+ DAG(dag), FuncInfo(funcinfo),
HasTailCall(false) {
}
@@ -692,19 +693,20 @@ public:
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
- MachineBasicBlock *SwitchBB, unsigned Opc,
- uint32_t TW, uint32_t FW);
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TW,
+ BranchProbability FW);
void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- uint32_t TW, uint32_t FW);
+ BranchProbability TW, BranchProbability FW);
bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
- MachineBasicBlock *LandingPad = nullptr);
+ const BasicBlock *EHPadBB = nullptr);
std::pair<SDValue, SDValue> lowerCallOperands(
ImmutableCallSite CS,
@@ -712,7 +714,7 @@ public:
unsigned NumArgs,
SDValue Callee,
Type *ReturnTy,
- MachineBasicBlock *LandingPad = nullptr,
+ const BasicBlock *EHPadBB = nullptr,
bool IsPatchPoint = false);
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
@@ -722,11 +724,11 @@ public:
// This function is responsible for the whole statepoint lowering process.
// It uniformly handles invoke and call statepoints.
void LowerStatepoint(ImmutableStatepoint Statepoint,
- MachineBasicBlock *LandingPad = nullptr);
+ const BasicBlock *EHPadBB = nullptr);
private:
- std::pair<SDValue, SDValue> lowerInvokable(
- TargetLowering::CallLoweringInfo &CLI,
- MachineBasicBlock *LandingPad);
+ std::pair<SDValue, SDValue>
+ lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB = nullptr);
// Terminator instructions.
void visitRet(const ReturnInst &I);
@@ -734,11 +736,18 @@ private:
void visitSwitch(const SwitchInst &I);
void visitIndirectBr(const IndirectBrInst &I);
void visitUnreachable(const UnreachableInst &I);
+ void visitCleanupRet(const CleanupReturnInst &I);
+ void visitCatchSwitch(const CatchSwitchInst &I);
+ void visitCatchRet(const CatchReturnInst &I);
+ void visitCatchPad(const CatchPadInst &I);
+ void visitCleanupPad(const CleanupPadInst &CPI);
+
+ BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
+ void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown());
- uint32_t getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const;
- void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
- uint32_t Weight = 0);
public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
@@ -748,7 +757,7 @@ public:
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
void visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
- uint32_t BranchWeightToNext,
+ BranchProbability BranchProbToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB);
@@ -842,7 +851,7 @@ private:
void visitVACopy(const CallInst &I);
void visitStackmap(const CallInst &I);
void visitPatchpoint(ImmutableCallSite CS,
- MachineBasicBlock *LandingPad = nullptr);
+ const BasicBlock *EHPadBB = nullptr);
// These three are implemented in StatepointLowering.cpp
void visitStatepoint(const CallInst &I);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 5b9b182..a1c6c4c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -30,6 +31,11 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+static cl::opt<bool>
+VerboseDAGDumping("dag-dump-verbose", cl::Hidden,
+ cl::desc("Display more information when dumping selection "
+ "DAG nodes."));
+
std::string SDNode::getOperationName(const SelectionDAG *G) const {
switch (getOpcode()) {
default:
@@ -102,6 +108,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH";
case ISD::ConstantPool: return "ConstantPool";
case ISD::TargetIndex: return "TargetIndex";
case ISD::ExternalSymbol: return "ExternalSymbol";
@@ -145,6 +152,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FABS: return "fabs";
case ISD::FMINNUM: return "fminnum";
case ISD::FMAXNUM: return "fmaxnum";
+ case ISD::FMINNAN: return "fminnan";
+ case ISD::FMAXNAN: return "fmaxnan";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::FSIN: return "fsin";
@@ -201,6 +210,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FPOWI: return "fpowi";
case ISD::SETCC: return "setcc";
+ case ISD::SETCCE: return "setcce";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
@@ -273,6 +283,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CALLSEQ_START: return "callseq_start";
case ISD::CALLSEQ_END: return "callseq_end";
+ // EH instructions
+ case ISD::CATCHRET: return "catchret";
+ case ISD::CLEANUPRET: return "cleanupret";
+
// Other operators
case ISD::LOAD: return "load";
case ISD::STORE: return "store";
@@ -295,15 +309,17 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LIFETIME_END: return "lifetime.end";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
case ISD::GC_TRANSITION_END: return "gc_transition.end";
+ case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
// Bit manipulation
+ case ISD::BITREVERSE: return "bitreverse";
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
case ISD::CTTZ: return "cttz";
case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
case ISD::CTLZ: return "ctlz";
case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
-
+
// Trampolines
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
@@ -320,7 +336,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETO: return "seto";
case ISD::SETUO: return "setuo";
- case ISD::SETUEQ: return "setue";
+ case ISD::SETUEQ: return "setueq";
case ISD::SETUGT: return "setugt";
case ISD::SETUGE: return "setuge";
case ISD::SETULT: return "setult";
@@ -352,6 +368,16 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
}
}
+static Printable PrintNodeId(const SDNode &Node) {
+ return Printable([&Node](raw_ostream &OS) {
+#ifndef NDEBUG
+ OS << 't' << Node.PersistentId;
+#else
+ OS << (const void*)&Node;
+#endif
+ });
+}
+
void SDNode::dump() const { dump(nullptr); }
void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
@@ -359,8 +385,6 @@ void SDNode::dump(const SelectionDAG *G) const {
}
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
- OS << (const void*)this << ": ";
-
for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
if (i) OS << ",";
if (getValueType(i) == MVT::Other)
@@ -368,7 +392,6 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
else
OS << getValueType(i).getEVTString();
}
- OS << " = " << getOperationName(G);
}
void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
@@ -523,48 +546,58 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
<< ']';
}
- if (unsigned Order = getIROrder())
- OS << " [ORD=" << Order << ']';
+ if (VerboseDAGDumping) {
+ if (unsigned Order = getIROrder())
+ OS << " [ORD=" << Order << ']';
- if (getNodeId() != -1)
- OS << " [ID=" << getNodeId() << ']';
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
- if (!G)
- return;
+ if (!G)
+ return;
- DILocation *L = getDebugLoc();
- if (!L)
- return;
+ DILocation *L = getDebugLoc();
+ if (!L)
+ return;
+
+ if (auto *Scope = L->getScope())
+ OS << Scope->getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << L->getLine();
+ if (unsigned C = L->getColumn())
+ OS << ':' << C;
+ }
+}
- if (auto *Scope = L->getScope())
- OS << Scope->getFilename();
- else
- OS << "<unknown>";
- OS << ':' << L->getLine();
- if (unsigned C = L->getColumn())
- OS << ':' << C;
+/// Return true if this node is so simple that we should just print it inline
+/// if it appears as an operand.
+static bool shouldPrintInline(const SDNode &Node) {
+ if (Node.getOpcode() == ISD::EntryToken)
+ return false;
+ return Node.getNumOperands() == 0;
}
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
- for (const SDValue &Op : N->op_values())
+ for (const SDValue &Op : N->op_values()) {
+ if (shouldPrintInline(*Op.getNode()))
+ continue;
if (Op.getNode()->hasOneUse())
DumpNodes(Op.getNode(), indent+2, G);
- else
- dbgs() << "\n" << std::string(indent+2, ' ')
- << (void*)Op.getNode() << ": <multiple use>";
+ }
- dbgs() << '\n';
dbgs().indent(indent);
N->dump(G);
}
void SelectionDAG::dump() const {
- dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
I != E; ++I) {
- const SDNode *N = I;
- if (!N->hasOneUse() && N != getRoot().getNode())
+ const SDNode *N = &*I;
+ if (!N->hasOneUse() && N != getRoot().getNode() &&
+ (!shouldPrintInline(*N) || N->use_empty()))
DumpNodes(N, 2, this);
}
@@ -573,10 +606,30 @@ void SelectionDAG::dump() const {
}
void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << PrintNodeId(*this) << ": ";
print_types(OS, G);
+ OS << " = " << getOperationName(G);
print_details(OS, G);
}
+static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
+ const SDValue Value) {
+ if (!Value.getNode()) {
+ OS << "<null>";
+ return false;
+ } else if (shouldPrintInline(*Value.getNode())) {
+ OS << Value->getOperationName(G) << ':';
+ Value->print_types(OS, G);
+ Value->print_details(OS, G);
+ return true;
+ } else {
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
+ }
+}
+
typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
@@ -589,20 +642,13 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
// Having printed this SDNode, walk the children:
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
-
if (i) OS << ",";
OS << " ";
- if (child->getNumOperands() == 0) {
- // This child has no grandchildren; print it inline right here.
- child->printr(OS, G);
- once.insert(child);
- } else { // Just the address. FIXME: also print the child's opcode.
- OS << (const void*)child;
- if (unsigned RN = N->getOperand(i).getResNo())
- OS << ":" << RN;
- }
+ const SDValue Op = N->getOperand(i);
+ bool printedInline = printOperand(OS, G, Op);
+ if (printedInline)
+ once.insert(Op.getNode());
}
OS << "\n";
@@ -664,12 +710,9 @@ void SDNode::dumprFull(const SelectionDAG *G) const {
}
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
+ printr(OS, G);
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
if (i) OS << ", "; else OS << " ";
- OS << (void*)getOperand(i).getNode();
- if (unsigned RN = getOperand(i).getResNo())
- OS << ":" << RN;
+ printOperand(OS, G, getOperand(i));
}
- print_details(OS, G);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 97ece8b..853a21a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
@@ -263,13 +264,17 @@ namespace llvm {
return;
IS.OptLevel = NewOptLevel;
IS.TM.setOptLevel(NewOptLevel);
- SavedFastISel = IS.TM.Options.EnableFastISel;
- if (NewOptLevel == CodeGenOpt::None)
- IS.TM.setFastISel(true);
DEBUG(dbgs() << "\nChanging optimization level for Function "
<< IS.MF->getFunction()->getName() << "\n");
DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
<< " ; After: -O" << NewOptLevel << "\n");
+ SavedFastISel = IS.TM.Options.EnableFastISel;
+ if (NewOptLevel == CodeGenOpt::None) {
+ IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
+ DEBUG(dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
+ }
}
~OptLevelChanger() {
@@ -293,6 +298,11 @@ namespace llvm {
const TargetLowering *TLI = IS->TLI;
const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
+ // Try first to see if the Target has its own way of selecting a scheduler
+ if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) {
+ return SchedulerCtor(IS, OptLevel);
+ }
+
if (OptLevel == CodeGenOpt::None ||
(ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
TLI->getSchedulingPreference() == Sched::Source)
@@ -350,8 +360,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
OptLevel(OL),
DAGSize(0) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
- initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
- initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
initializeTargetLibraryInfoWrapperPassPass(
*PassRegistry::getPassRegistry());
}
@@ -363,13 +374,12 @@ SelectionDAGISel::~SelectionDAGISel() {
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
- AU.addRequired<BranchProbabilityInfo>();
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -380,10 +390,10 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
///
/// This is required for correctness, so it must be done at -O0.
///
-static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
+static void SplitCriticalSideEffectEdges(Function &Fn) {
// Loop for blocks with phi nodes.
- for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ for (BasicBlock &BB : Fn) {
+ PHINode *PN = dyn_cast<PHINode>(BB.begin());
if (!PN) continue;
ReprocessBlock:
@@ -391,7 +401,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
// are potentially trapping constant expressions. Constant expressions are
// the only potentially trapping value that can occur as the argument to a
// PHI.
- for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
if (!CE || !CE->canTrap()) continue;
@@ -405,8 +415,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
// Okay, we have to split this edge.
SplitCriticalEdge(
- Pred->getTerminator(), GetSuccessorNumber(Pred, BB),
- CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges());
+ Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
+ CriticalEdgeSplittingOptions().setMergeIdenticalEdges());
goto ReprocessBlock;
}
}
@@ -437,19 +447,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA);
+ SplitCriticalSideEffectEdges(const_cast<Function &>(Fn));
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF, CurDAG);
if (UseMBPI && OptLevel != CodeGenOpt::None)
- FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else
FuncInfo->BPI = nullptr;
@@ -457,15 +467,50 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MF->setHasInlineAsm(false);
+ FuncInfo->SplitCSR = false;
+ SmallVector<MachineBasicBlock*, 4> Returns;
+
+ // We split CSR if the target supports it for the given function
+ // and the function has only return exits.
+ if (TLI->supportSplitCSR(MF)) {
+ FuncInfo->SplitCSR = true;
+
+ // Collect all the return blocks.
+ for (const BasicBlock &BB : Fn) {
+ if (!succ_empty(&BB))
+ continue;
+
+ const TerminatorInst *Term = BB.getTerminator();
+ if (isa<UnreachableInst>(Term))
+ continue;
+ if (isa<ReturnInst>(Term)) {
+ Returns.push_back(FuncInfo->MBBMap[&BB]);
+ continue;
+ }
+
+ // Bail out if the exit block is not Return nor Unreachable.
+ FuncInfo->SplitCSR = false;
+ break;
+ }
+ }
+
+ MachineBasicBlock *EntryMBB = &MF->front();
+ if (FuncInfo->SplitCSR)
+ // This performs initialization so lowering for SplitCSR will be correct.
+ TLI->initializeSplitCSR(EntryMBB);
+
SelectAllBasicBlocks(Fn);
// If the first basic block in the function has live ins that need to be
// copied into vregs, emit the copies into the top of the block before
// emitting the code for the block.
- MachineBasicBlock *EntryMBB = MF->begin();
const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
+ // Insert copies in the entry block and the return blocks.
+ if (FuncInfo->SplitCSR)
+ TLI->insertCopiesSplitCSR(EntryMBB, Returns);
+
DenseMap<unsigned, unsigned> LiveInMap;
if (!FuncInfo->ArgDbgValues.empty())
for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
@@ -882,7 +927,7 @@ void SelectionDAGISel::DoInstructionSelection() {
// graph) and preceding back toward the beginning (the entry
// node).
while (ISelPosition != CurDAG->allnodes_begin()) {
- SDNode *Node = --ISelPosition;
+ SDNode *Node = &*--ISelPosition;
// Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
// but there are currently some corner cases that it misses. Also, this
// makes it theoretically possible to disable the DAGCombiner.
@@ -916,14 +961,47 @@ void SelectionDAGISel::DoInstructionSelection() {
PostprocessISelDAG();
}
+static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
+ for (const User *U : CPI->users()) {
+ if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = EHPtrCall->getIntrinsicID();
+ if (IID == Intrinsic::eh_exceptionpointer ||
+ IID == Intrinsic::eh_exceptioncode)
+ return true;
+ }
+ }
+ return false;
+}
+
/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
/// do other setup for EH landing-pad blocks.
bool SelectionDAGISel::PrepareEHLandingPad() {
MachineBasicBlock *MBB = FuncInfo->MBB;
-
+ const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn();
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
const TargetRegisterClass *PtrRC =
TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
+ // Catchpads have one live-in register, which typically holds the exception
+ // pointer or code.
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+ if (hasExceptionPointerOrCodeUser(CPI)) {
+ // Get or create the virtual register to hold the pointer or code. Mark
+ // the live in physreg and copy into the vreg.
+ MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+ assert(EHPhysReg && "target lacks exception pointer register");
+ MBB->addLiveIn(EHPhysReg);
+ unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::COPY), VReg)
+ .addReg(EHPhysReg, RegState::Kill);
+ }
+ return true;
+ }
+
+ if (!LLVMBB->isLandingPad())
+ return true;
+
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@@ -935,52 +1013,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
- // If this is an MSVC-style personality function, we need to split the landing
- // pad into several BBs.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
- MF->getMMI().addPersonality(MBB, cast<Function>(LPadInst->getParent()
- ->getParent()
- ->getPersonalityFn()
- ->stripPointerCasts()));
- EHPersonality Personality = MF->getMMI().getPersonalityType();
-
- if (isMSVCEHPersonality(Personality)) {
- SmallVector<MachineBasicBlock *, 4> ClauseBBs;
- const IntrinsicInst *ActionsCall =
- dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt());
- // Get all invoke BBs that unwind to this landingpad.
- SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
- MBB->pred_end());
- if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) {
- // If this is a call to llvm.eh.actions followed by indirectbr, then we've
- // run WinEHPrepare, and we should remove this block from the machine CFG.
- // Mark the targets of the indirectbr as landingpads instead.
- for (const BasicBlock *LLVMSucc : successors(LLVMBB)) {
- MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc];
- // Add the edge from the invoke to the clause.
- for (MachineBasicBlock *InvokeBB : InvokeBBs)
- InvokeBB->addSuccessor(ClauseBB);
-
- // Mark the clause as a landing pad or MI passes will delete it.
- ClauseBB->setIsLandingPad();
- }
- }
-
- // Remove the edge from the invoke to the lpad.
- for (MachineBasicBlock *InvokeBB : InvokeBBs)
- InvokeBB->removeSuccessor(MBB);
-
- // Don't select instructions for the landingpad.
- return false;
- }
-
// Mark exception register as live in.
- if (unsigned Reg = TLI->getExceptionPointerRegister())
+ if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
// Mark exception selector register as live in.
- if (unsigned Reg = TLI->getExceptionSelectorRegister())
+ if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
return true;
@@ -992,9 +1030,9 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
static bool isFoldedOrDeadInstruction(const Instruction *I,
FunctionLoweringInfo *FuncInfo) {
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
- !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
!isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
- !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded.
+ !I->isEHPad() && // EH pad instructions aren't folded.
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
@@ -1143,17 +1181,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->VisitedBBs.insert(LLVMBB);
}
- BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+ BasicBlock::const_iterator const Begin =
+ LLVMBB->getFirstNonPHI()->getIterator();
BasicBlock::const_iterator const End = LLVMBB->end();
BasicBlock::const_iterator BI = End;
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ if (!FuncInfo->MBB)
+ continue; // Some blocks like catchpads have no code or MBB.
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
// Setup an EH landing-pad block.
FuncInfo->ExceptionPointerVirtReg = 0;
FuncInfo->ExceptionSelectorVirtReg = 0;
- if (LLVMBB->isLandingPad())
+ if (LLVMBB->isEHPad())
if (!PrepareEHLandingPad())
continue;
@@ -1192,7 +1233,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
unsigned NumFastIselRemaining = std::distance(Begin, End);
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
- const Instruction *Inst = std::prev(BI);
+ const Instruction *Inst = &*std::prev(BI);
// If we no longer require this instruction, skip it.
if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
@@ -1212,8 +1253,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// then see if there is a load right before the selected instructions.
// Try to fold the load if so.
const Instruction *BeforeInst = Inst;
- while (BeforeInst != Begin) {
- BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst));
+ while (BeforeInst != &*Begin) {
+ BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
break;
}
@@ -1245,7 +1286,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// For the purpose of debugging, just abort.
report_fatal_error("FastISel didn't select the entire block");
- if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+ if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
+ !Inst->use_empty()) {
unsigned &R = FuncInfo->ValueMap[Inst];
if (!R)
R = FuncInfo->CreateRegs(Inst->getType());
@@ -1253,7 +1295,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
bool HadTailCall = false;
MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
- SelectBasicBlock(Inst, BI, HadTailCall);
+ SelectBasicBlock(Inst->getIterator(), BI, HadTailCall);
// If the call was emitted as a tail call, we're done with the block.
// We also need to delete any previously emitted instructions.
@@ -1483,35 +1525,39 @@ SelectionDAGISel::FinishBasicBlock() {
CodeGenAndEmitDAG();
}
- uint32_t UnhandledWeight = 0;
- for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j)
- UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight;
-
+ BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob;
for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
- UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight;
+ UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb;
// Set the current basic block to the mbb we wish to insert the code into
FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- if (j+1 != ej)
- SDB->visitBitTestCase(SDB->BitTestCases[i],
- SDB->BitTestCases[i].Cases[j+1].ThisBB,
- UnhandledWeight,
- SDB->BitTestCases[i].Reg,
- SDB->BitTestCases[i].Cases[j],
- FuncInfo->MBB);
+
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range.
+ MachineBasicBlock *NextMBB;
+ if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
+ NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB;
+ else if (j + 1 != ej)
+ NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB;
else
- SDB->visitBitTestCase(SDB->BitTestCases[i],
- SDB->BitTestCases[i].Default,
- UnhandledWeight,
- SDB->BitTestCases[i].Reg,
- SDB->BitTestCases[i].Cases[j],
- FuncInfo->MBB);
+ NextMBB = SDB->BitTestCases[i].Default;
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ NextMBB,
+ UnhandledProb,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j],
+ FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
+
+ if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
+ break;
}
// Update PHI Nodes
@@ -1642,14 +1688,7 @@ SelectionDAGISel::FinishBasicBlock() {
/// one preferred by the target.
///
ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
- RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
-
- if (!Ctor) {
- Ctor = ISHeuristic;
- RegisterScheduler::setDefault(Ctor);
- }
-
- return Ctor(this, OptLevel);
+ return ISHeuristic(this, OptLevel);
}
//===----------------------------------------------------------------------===//
@@ -1961,7 +2000,7 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
}
/// GetVBR - decode a vbr encoding whose top bit is set.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
assert(Val >= 128 && "Not a VBR");
Val &= 127; // Remove first vbr bit.
@@ -2287,7 +2326,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
}
/// CheckSame - Implements OP_CheckSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
@@ -2298,7 +2337,7 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckChildSame - Implements OP_CheckChildXSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
@@ -2310,20 +2349,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -2331,7 +2370,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N->getOpcode() == Opc;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2341,7 +2380,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL,
unsigned ChildNo) {
@@ -2351,14 +2390,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
(ISD::CondCode)MatcherTable[MatcherIndex++];
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2369,7 +2408,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2380,7 +2419,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && C->getSExtValue() == Val;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
@@ -2388,7 +2427,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2401,7 +2440,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 4df5ede..2764688 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -80,9 +80,16 @@ namespace llvm {
return true;
}
- static bool hasNodeAddressLabel(const SDNode *Node,
- const SelectionDAG *Graph) {
- return true;
+ static std::string getNodeIdentifierLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+#ifndef NDEBUG
+ OS << 't' << Node->PersistentId;
+#else
+ OS << static_cast<const void *>(Node);
+#endif
+ return R;
}
/// If you want to override the dot attributes printed for a particular
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 34688df..050ec21 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -95,6 +96,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+ MFI->markAsStatepointSpillSlotObjectIndex(FI);
+
Builder.FuncInfo.StatepointStackSlots.push_back(FI);
AllocatedStackSlots.push_back(true);
return SpillSlot;
@@ -105,8 +109,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
return Builder.DAG.getFrameIndex(FI, ValueType);
}
// Note: We deliberately choose to advance this only on the failing path.
- // Doing so on the suceeding path involes a bit of complexity that caused a
- // minor bug previously. Unless performance shows this matters, please
+ // Doing so on the succeeding path involves a bit of complexity that caused
+ // a minor bug previously. Unless performance shows this matters, please
// keep this code as simple as possible.
NextSlotToAllocate++;
}
@@ -119,7 +123,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
static Optional<int> findPreviousSpillSlot(const Value *Val,
SelectionDAGBuilder &Builder,
int LookUpDepth) {
- // Can not look any futher - give up now
+ // Can not look any further - give up now
if (LookUpDepth <= 0)
return Optional<int>();
@@ -196,7 +200,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
/// Try to find existing copies of the incoming values in stack slots used for
/// statepoint spilling. If we can find a spill slot for the incoming value,
/// mark that slot as allocated, and reuse the same slot for this safepoint.
-/// This helps to avoid series of loads and stores that only serve to resuffle
+/// This helps to avoid series of loads and stores that only serve to reshuffle
/// values on the stack between calls.
static void reservePreviousStackSlotForValue(const Value *IncomingValue,
SelectionDAGBuilder &Builder) {
@@ -255,7 +259,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
SmallVectorImpl<const Value *> &Relocs,
SelectionDAGBuilder &Builder) {
- // This is horribly ineffecient, but I don't care right now
+ // This is horribly inefficient, but I don't care right now
SmallSet<SDValue, 64> Seen;
SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs;
@@ -283,13 +287,29 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
/// call node. Also update NodeMap so that getValue(statepoint) will
/// reference lowered call result
static SDNode *
-lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
+lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB,
SelectionDAGBuilder &Builder,
SmallVectorImpl<SDValue> &PendingExports) {
ImmutableCallSite CS(ISP.getCallSite());
- SDValue ActualCallee = Builder.getValue(ISP.getCalledValue());
+ SDValue ActualCallee;
+
+ if (ISP.getNumPatchBytes() > 0) {
+ // If we've been asked to emit a nop sequence instead of a call instruction
+ // for this statepoint then don't lower the call target, but use a constant
+ // `null` instead. Not lowering the call target lets statepoint clients get
+ // away without providing a physical address for the symbolic call target at
+ // link time.
+
+ const auto &TLI = Builder.DAG.getTargetLoweringInfo();
+ const auto &DL = Builder.DAG.getDataLayout();
+
+ unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
+ ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(),
+ TLI.getPointerTy(DL, AS));
+ } else
+ ActualCallee = Builder.getValue(ISP.getCalledValue());
assert(CS.getCallingConv() != CallingConv::AnyReg &&
"anyregcc is not supported on statepoints!");
@@ -300,7 +320,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
SDValue ReturnValue, CallEndVal;
std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands(
ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos,
- ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad,
+ ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB,
false /* IsPatchPoint */);
SDNode *CallEnd = CallEndVal.getNode();
@@ -317,25 +337,33 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
// ch, glue = callseq_end ch, glue
// get_return_value ch, glue
//
- // get_return_value can either be a CopyFromReg to grab the return value from
- // %RAX, or it can be a LOAD to load a value returned by reference via a stack
- // slot.
+ // get_return_value can either be a sequence of CopyFromReg instructions
+ // to grab the return value from the return register(s), or it can be a LOAD
+ // to load a value returned by reference via a stack slot.
- if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg ||
- CallEnd->getOpcode() == ISD::LOAD))
- CallEnd = CallEnd->getOperand(0).getNode();
+ if (HasDef) {
+ if (CallEnd->getOpcode() == ISD::LOAD)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ else
+ while (CallEnd->getOpcode() == ISD::CopyFromReg)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ }
assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
- if (HasDef) {
- if (CS.isInvoke()) {
- // Result value will be used in different basic block for invokes
- // so we need to export it now. But statepoint call has a different type
- // than the actuall call. It means that standart exporting mechanism will
- // create register of the wrong type. So instead we need to create
- // register with correct type and save value into it manually.
+ // Export the result value if needed
+ const Instruction *GCResult = ISP.getGCResult();
+ if (HasDef && GCResult) {
+ if (GCResult->getParent() != CS.getParent()) {
+ // Result value will be used in a different basic block so we need to
+ // export it now.
+ // Default exporting mechanism will not work here because statepoint call
+ // has a different type than the actual call. It means that by default
+ // llvm will create export register of the wrong type (always i32 in our
+ // case). So instead we need to create export register with correct type
+ // manually.
// TODO: To eliminate this problem we can remove gc.result intrinsics
- // completelly and make statepoint call to return a tuple.
+ // completely and make statepoint call to return a tuple.
unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType());
RegsForValue RFV(
*Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(),
@@ -347,8 +375,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
PendingExports.push_back(Chain);
Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg;
} else {
- // The value of the statepoint itself will be the value of call itself.
- // We'll replace the actually call node shortly. gc_result will grab
+ // Result value will be used in a same basic block. Don't export it or
+ // perform any explicit register copies.
+ // We'll replace the actuall call node shortly. gc_result will grab
// this value.
Builder.setValue(CS.getInstruction(), ReturnValue);
}
@@ -411,7 +440,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// chaining stores one after another, this may allow
// a bit more optimal scheduling for them
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
- MachinePointerInfo::getFixedStack(Index),
+ MachinePointerInfo::getFixedStack(
+ Builder.DAG.getMachineFunction(), Index),
false, false, 0);
Builder.StatepointLowering.setLocation(Incoming, Loc);
@@ -483,21 +513,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// to the GCStrategy from there (yet).
GCStrategy &S = Builder.GFI->getStrategy();
for (const Value *V : Bases) {
- auto Opt = S.isGCManagedPointer(V);
+ auto Opt = S.isGCManagedPointer(V->getType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed base pointer found in statepoint");
}
}
for (const Value *V : Ptrs) {
- auto Opt = S.isGCManagedPointer(V);
+ auto Opt = S.isGCManagedPointer(V->getType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed derived pointer found in statepoint");
}
}
for (const Value *V : Relocations) {
- auto Opt = S.isGCManagedPointer(V);
+ auto Opt = S.isGCManagedPointer(V->getType());
if (Opt.hasValue()) {
assert(Opt.getValue() && "non gc managed pointer relocated");
}
@@ -581,19 +611,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
} else {
// Record value as visited, but not spilled. This is case for allocas
- // and constants. For this values we can avoid emiting spill load while
+ // and constants. For this values we can avoid emitting spill load while
// visiting corresponding gc_relocate.
// Actually we do not need to record them in this map at all.
- // We do this only to check that we are not relocating any unvisited value.
+ // We do this only to check that we are not relocating any unvisited
+ // value.
SpillMap[V] = None;
// Default llvm mechanisms for exporting values which are used in
// different basic blocks does not work for gc relocates.
// Note that it would be incorrect to teach llvm that all relocates are
- // uses of the corresponging values so that it would automatically
+ // uses of the corresponding values so that it would automatically
// export them. Relocates of the spilled values does not use original
// value.
- if (StatepointSite.getCallSite().isInvoke())
+ if (RelocateOpers.getUnderlyingCallSite().getParent() !=
+ StatepointInstr->getParent())
Builder.ExportFromCurrentBlock(V);
}
}
@@ -608,7 +640,7 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
}
void SelectionDAGBuilder::LowerStatepoint(
- ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) {
+ ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) {
// The basic scheme here is that information about both the original call and
// the safepoint is encoded in the CallInst. We create a temporary call and
// lower it, then reverse engineer the calling sequence.
@@ -620,14 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint(
ImmutableCallSite CS(ISP.getCallSite());
#ifndef NDEBUG
- // Consistency check. Don't do this for invokes. It would be too
- // expensive to preserve this information across different basic blocks
- if (!CS.isInvoke()) {
- for (const User *U : CS->users()) {
- const CallInst *Call = cast<CallInst>(U);
- if (isGCRelocate(Call))
- StatepointLowering.scheduleRelocCall(*Call);
- }
+ // Consistency check. Check only relocates in the same basic block as thier
+ // statepoint.
+ for (const User *U : CS->users()) {
+ const CallInst *Call = cast<CallInst>(U);
+ if (isGCRelocate(Call) && Call->getParent() == CS.getParent())
+ StatepointLowering.scheduleRelocCall(*Call);
}
#endif
@@ -648,7 +678,7 @@ void SelectionDAGBuilder::LowerStatepoint(
// Get call node, we will replace it later with statepoint
SDNode *CallNode =
- lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports);
+ lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports);
// Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
// nodes with all the appropriate arguments and return values.
@@ -790,7 +820,7 @@ void SelectionDAGBuilder::LowerStatepoint(
// Replace original call
DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
- // Remove originall call node
+ // Remove original call node
DAG.DeleteNode(CallNode);
// DON'T set the root - under the assumption that it's already set past the
@@ -809,8 +839,9 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
Instruction *I = cast<Instruction>(CI.getArgOperand(0));
assert(isStatepoint(I) && "first argument must be a statepoint token");
- if (isa<InvokeInst>(I)) {
- // For invokes we should have stored call result in a virtual register.
+ if (I->getParent() != CI.getParent()) {
+ // Statepoint is in different basic block so we should have stored call
+ // result in a virtual register.
// We can not use default getValue() functionality to copy value from this
// register because statepoint and actuall call return types can be
// different, and getValue() will use CopyFromReg of the wrong type,
@@ -833,9 +864,10 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
#ifndef NDEBUG
// Consistency check
- // We skip this check for invoke statepoints. It would be too expensive to
- // preserve validation info through different basic blocks.
- if (!RelocateOpers.isTiedToInvoke()) {
+ // We skip this check for relocates not in the same basic block as thier
+ // statepoint. It would be too expensive to preserve validation info through
+ // different basic blocks.
+ if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) {
StatepointLowering.relocCallVisited(CI);
}
#endif
@@ -862,13 +894,14 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
// Be conservative: flush all pending loads
// TODO: Probably we can be less restrictive on this,
- // it may allow more scheduling opprtunities
+ // it may allow more scheduling opportunities.
SDValue Chain = getRoot();
SDValue SpillLoad =
- DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
- MachinePointerInfo::getFixedStack(*DerivedPtrLocation),
- false, false, false, 0);
+ DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ *DerivedPtrLocation),
+ false, false, false, 0);
// Again, be conservative, don't emit pending loads
DAG.setRoot(SpillLoad.getValue(1));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index fbf6512..c64d882 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -85,21 +85,22 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG,
RTLIB::Libcall LC, EVT RetVT,
- const SDValue *Ops, unsigned NumOps,
+ ArrayRef<SDValue> Ops,
bool isSigned, SDLoc dl,
bool doesNotReturn,
bool isReturnValueUsed) const {
TargetLowering::ArgListTy Args;
- Args.reserve(NumOps);
+ Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0; i != NumOps; ++i) {
- Entry.Node = Ops[i];
+ for (SDValue Op : Ops) {
+ Entry.Node = Op;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
- Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
+ Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
Args.push_back(Entry);
}
+
if (LC == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
@@ -115,9 +116,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
return LowerCallTo(CLI);
}
-
-/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
-/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+/// Soften the operands of a comparison. This code is shared among BR_CC,
+/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
@@ -127,6 +127,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Expand into one or more soft-fp libcall(s).
RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ bool ShouldInvertCC = false;
switch (CCCode) {
case ISD::SETEQ:
case ISD::SETOEQ:
@@ -166,34 +167,38 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
(VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
break;
- default:
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ break;
+ default:
+ // Invert CC for unordered comparisons
+ ShouldInvertCC = true;
switch (CCCode) {
- case ISD::SETONE:
- // SETONE = SETOLT | SETOGT
- LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
- // Fallthrough
- case ISD::SETUGT:
- LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
- break;
- case ISD::SETUGE:
- LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
- (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
- break;
case ISD::SETULT:
- LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
break;
case ISD::SETULE:
- LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
break;
- case ISD::SETUEQ:
- LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
- (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ case ISD::SETUGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
break;
default: llvm_unreachable("Do not know how to soften this setcc!");
}
@@ -201,17 +206,21 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
- SDValue Ops[2] = { NewLHS, NewRHS };
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/,
+ SDValue Ops[2] = {NewLHS, NewRHS};
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
dl).first;
NewRHS = DAG.getConstant(0, dl, RetVT);
+
CCCode = getCmpLibcallCC(LC1);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
+
if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
SDValue Tmp = DAG.getNode(
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/,
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
dl).first;
NewLHS = DAG.getNode(
ISD::SETCC, dl,
@@ -222,9 +231,8 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
}
}
-/// getJumpTableEncoding - Return the entry encoding for a jump table in the
-/// current function. The returned value is a member of the
-/// MachineJumpTableInfo::JTEntryKind enum.
+/// Return the entry encoding for a jump table in the current function. The
+/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
@@ -250,9 +258,8 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
return Table;
}
-/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
-/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
-/// MCExpr.
+/// This returns the relocation base for the given PIC jumptable, the same as
+/// getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,MCContext &Ctx) const{
@@ -279,10 +286,9 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// ShrinkDemandedConstant - Check to see if the specified operand of the
-/// specified instruction is a constant integer. If so, check to see if there
-/// are any bits set in the constant that are not demanded. If so, shrink the
-/// constant and return true.
+/// Check to see if the specified operand of the specified instruction is a
+/// constant integer. If so, check to see if there are any bits set in the
+/// constant that are not demanded. If so, shrink the constant and return true.
bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
const APInt &Demanded) {
SDLoc dl(Op);
@@ -317,10 +323,9 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
return false;
}
-/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
-/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
-/// cast, but it could be generalized for targets with other types of
-/// implicit widening casts.
+/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
+/// generalized for targets with other types of implicit widening casts.
bool
TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
unsigned BitWidth,
@@ -366,13 +371,13 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
return false;
}
-/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
-/// DemandedMask bits of the result of Op are ever used downstream. If we can
-/// use this information to simplify Op, create a new simplified DAG node and
-/// return true, returning the original and new nodes in Old and New. Otherwise,
-/// analyze the expression and return a mask of KnownOne and KnownZero bits for
-/// the expression (used to simplify the caller). The KnownZero/One bits may
-/// only be accurate for those bits in the DemandedMask.
+/// Look at Op. At this point, we know that only the DemandedMask bits of the
+/// result of Op are ever used downstream. If we can use this information to
+/// simplify Op, create a new simplified DAG node and return true, returning the
+/// original and new nodes in Old and New. Otherwise, analyze the expression and
+/// return a mask of KnownOne and KnownZero bits for the expression (used to
+/// simplify the caller). The KnownZero/One bits may only be accurate for those
+/// bits in the DemandedMask.
bool TargetLowering::SimplifyDemandedBits(SDValue Op,
const APInt &DemandedMask,
APInt &KnownZero,
@@ -1061,7 +1066,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Op.getOperand(0).getValueType().isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
- if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+ if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() &&
+ Op.getOperand(0).getValueType() != MVT::f128) {
+ // Cannot eliminate/lower SHL for f128 yet.
EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
@@ -1120,9 +1127,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return false;
}
-/// computeKnownBitsForTargetNode - Determine which of the bits specified
-/// in Mask are known to be either zero or one and return them in the
-/// KnownZero/KnownOne bitsets.
+/// Determine which of the bits specified in Mask are known to be either zero or
+/// one and return them in the KnownZero/KnownOne bitsets.
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
@@ -1137,9 +1143,8 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
}
-/// ComputeNumSignBitsForTargetNode - This method can be implemented by
-/// targets that want to expose additional information about sign bits to the
-/// DAG Combiner.
+/// This method can be implemented by targets that want to expose additional
+/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
const SelectionDAG &,
unsigned Depth) const {
@@ -1152,10 +1157,8 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
-/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
-/// one bit set. This differs from computeKnownBits in that it doesn't need to
-/// determine which bit is set.
-///
+/// Test if the given value is known to have exactly one bit set. This differs
+/// from computeKnownBits in that it doesn't need to determine which bit is set.
static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
// A left-shift of a constant one will have exactly one bit set, because
// shifting the bit off the end is undefined.
@@ -1239,8 +1242,8 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
return CN->isNullValue();
}
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands
-/// and cc. If it is unable to simplify it, return a null SDValue.
+/// Try to simplify a setcc built with the specified operands and cc. If it is
+/// unable to simplify it, return a null SDValue.
SDValue
TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, bool foldBooleans,
@@ -1270,7 +1273,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
- if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
@@ -1335,7 +1338,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
PreExt = N0->getOperand(0);
} else if (N0->getOpcode() == ISD::AND) {
// DAGCombine turns costly ZExts into ANDs
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
if ((C->getAPIntValue()+1).isPowerOf2()) {
MinBits = C->getAPIntValue().countTrailingOnes();
PreExt = N0->getOperand(0);
@@ -1345,7 +1348,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
MinBits = N0->getOperand(0).getValueSizeInBits();
PreExt = N0->getOperand(0);
Signed = true;
- } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
// ZEXTLOAD / SEXTLOAD
if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
MinBits = LN0->getMemoryVT().getSizeInBits();
@@ -1697,8 +1700,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
N0.getOpcode() == ISD::AND) {
auto &DL = DAG.getDataLayout();
- if (ConstantSDNode *AndRHS =
- dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
EVT ShiftTy = DCI.isBeforeLegalize()
? getPointerTy(DL)
: getShiftAmountTy(N0.getValueType(), DL);
@@ -1728,8 +1730,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// (X & -256) == 256 -> (X >> 8) == 1
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
- if (ConstantSDNode *AndRHS =
- dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
@@ -1783,7 +1784,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Constant fold or commute setcc.
SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
if (O.getNode()) return O;
- } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
// If the RHS of an FP comparison is a constant, simplify it away in
// some cases.
if (CFP->getValueAPF().isNaN()) {
@@ -1900,8 +1901,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// to be careful about increasing register pressure needlessly.
bool LegalRHSImm = false;
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
- if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
return DAG.getSetCC(dl, VT, N0.getOperand(0),
@@ -1924,7 +1925,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// Turn (C1-X) == C2 --> X == C1-C2
- if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
return
DAG.getSetCC(dl, VT, N0.getOperand(1),
@@ -2075,12 +2076,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
-/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
-/// node is a GlobalAddress + offset.
+/// Returns true (and the GlobalValue and the offset) if the node is a
+/// GlobalAddress + offset.
bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
int64_t &Offset) const {
- if (isa<GlobalAddressSDNode>(N)) {
- GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+ if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
GA = GASD->getGlobal();
Offset += GASD->getOffset();
return true;
@@ -2090,14 +2090,12 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
SDValue N1 = N->getOperand(0);
SDValue N2 = N->getOperand(1);
if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
- if (V) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
Offset += V->getSExtValue();
return true;
}
} else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
- if (V) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
Offset += V->getSExtValue();
return true;
}
@@ -2107,9 +2105,8 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
return false;
}
-
-SDValue TargetLowering::
-PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+SDValue TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
// Default implementation: no optimization.
return SDValue();
}
@@ -2159,9 +2156,9 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
return C_Unknown;
}
-/// LowerXConstraint - try to replace an X constraint, which matches anything,
-/// with another that has more specific requirements based on the type of the
-/// corresponding operand.
+/// Try to replace an X constraint, which matches anything, with another that
+/// has more specific requirements based on the type of the corresponding
+/// operand.
const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
if (ConstraintVT.isInteger())
return "r";
@@ -2170,8 +2167,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
return nullptr;
}
-/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-/// vector. If it is invalid, don't add anything to Ops.
+/// Lower the specified operand into the Ops vector.
+/// If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
@@ -2284,31 +2281,30 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
//===----------------------------------------------------------------------===//
// Constraint Selection.
-/// isMatchingInputConstraint - Return true of this is an input operand that is
-/// a matching constraint like "4".
+/// Return true of this is an input operand that is a matching constraint like
+/// "4".
bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
assert(!ConstraintCode.empty() && "No known constraint!");
return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
}
-/// getMatchedOperand - If this is an input matching constraint, this method
-/// returns the output operand it matches.
+/// If this is an input matching constraint, this method returns the output
+/// operand it matches.
unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
assert(!ConstraintCode.empty() && "No known constraint!");
return atoi(ConstraintCode.c_str());
}
-
-/// ParseConstraints - Split up the constraint string from the inline
-/// assembly value into the specific constraints and their prefixes,
-/// and also tie in the associated operand values.
+/// Split up the constraint string from the inline assembly value into the
+/// specific constraints and their prefixes, and also tie in the associated
+/// operand values.
/// If this returns an empty vector, and if the constraint string itself
/// isn't empty, there was an error parsing.
TargetLowering::AsmOperandInfoVector
TargetLowering::ParseConstraints(const DataLayout &DL,
const TargetRegisterInfo *TRI,
ImmutableCallSite CS) const {
- /// ConstraintOperands - Information about all of the constraints.
+ /// Information about all of the constraints.
AsmOperandInfoVector ConstraintOperands;
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
unsigned maCount = 0; // Largest number of multiple alternative constraints.
@@ -2483,16 +2479,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
" incompatible type!");
}
}
-
}
}
return ConstraintOperands;
}
-
-/// getConstraintGenerality - Return an integer indicating how general CT
-/// is.
+/// Return an integer indicating how general CT is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
case TargetLowering::C_Other:
@@ -2581,8 +2574,8 @@ TargetLowering::ConstraintWeight
return weight;
}
-/// ChooseConstraint - If there are multiple different constraints that we
-/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// If there are multiple different constraints that we could pick for this
+/// operand (e.g. "imr") try to pick the 'best' one.
/// This is somewhat tricky: constraints fall into four classes:
/// Other -> immediates and magic values
/// Register -> one specific register
@@ -2649,9 +2642,8 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
OpInfo.ConstraintType = BestType;
}
-/// ComputeConstraintToUse - Determines the constraint code and constraint
-/// type to use for the specific AsmOperandInfo, setting
-/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+/// Determines the constraint code and constraint type to use for the specific
+/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
SDValue Op,
SelectionDAG *DAG) const {
@@ -2717,6 +2709,16 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
return Mul;
}
+SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const {
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue();
+}
+
/// \brief Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
@@ -3036,3 +3038,46 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
return true;
}
+
+//===----------------------------------------------------------------------===//
+// Implementation of Emulated TLS Model
+//===----------------------------------------------------------------------===//
+
+SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ // Access to address of TLS varialbe xyz is lowered to a function call:
+ // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
+ SDLoc dl(GA);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
+ Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
+ StringRef EmuTlsVarName(NameString);
+ GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
+ if (!EmuTlsVar)
+ EmuTlsVar = dyn_cast_or_null<GlobalVariable>(
+ VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType));
+ Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
+ Entry.Ty = VoidPtrType;
+ Args.push_back(Entry);
+
+ SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
+ CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ // At last for X86 targets, maybe good for other targets too?
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true); // Is this only for X86 target?
+ MFI->setHasCalls(true);
+
+ assert((GA->getOffset() == 0) &&
+ "Emulated TLS must have zero offset in GlobalAddressSDNode");
+ return CallResult.first;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index e7b2a8e..878eeee 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -112,7 +112,7 @@ public:
case 1:
// Find all 'return', 'resume', and 'unwind' instructions.
while (StateBB != StateE) {
- BasicBlock *CurBB = StateBB++;
+ BasicBlock *CurBB = &*StateBB++;
// Branches and invokes do not escape, only unwind, resume, and return
// do.
@@ -120,7 +120,7 @@ public:
if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
continue;
- Builder.SetInsertPoint(TI->getParent(), TI);
+ Builder.SetInsertPoint(TI);
return &Builder;
}
@@ -163,8 +163,8 @@ public:
// Split the basic block containing the function call.
BasicBlock *CallBB = CI->getParent();
- BasicBlock *NewBB =
- CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+ BasicBlock *NewBB = CallBB->splitBasicBlock(
+ CI->getIterator(), CallBB->getName() + ".cont");
// Remove the unconditional branch inserted at the end of CallBB.
CallBB->getInstList().pop_back();
@@ -184,7 +184,7 @@ public:
delete CI;
}
- Builder.SetInsertPoint(RI->getParent(), RI);
+ Builder.SetInsertPoint(RI);
return &Builder;
}
}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
index 4463cc7..f8aa1e2 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -43,9 +43,11 @@
// points must be in the same loop.
// Property #3 is ensured via the MachineBlockFrequencyInfo.
//
-// If this pass found points matching all this properties, then
-// MachineFrameInfo is updated this that information.
+// If this pass found points matching all these properties, then
+// MachineFrameInfo is updated with this information.
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
// To check for profitability.
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -61,11 +63,14 @@
#include "llvm/CodeGen/Passes.h"
// To know about callee-saved.
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
// To query the target about frame lowering.
#include "llvm/Target/TargetFrameLowering.h"
// To know about frame setup operation.
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
// To access TargetInstrInfo.
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -78,6 +83,10 @@ STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
STATISTIC(NumCandidatesDropped,
"Number of shrink-wrapping candidates dropped because of frequency");
+static cl::opt<cl::boolOrDefault>
+ EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
+ cl::desc("enable the shrink-wrapping pass"));
+
namespace {
/// \brief Class to determine where the safe point to insert the
/// prologue and epilogue are.
@@ -113,18 +122,38 @@ class ShrinkWrap : public MachineFunctionPass {
unsigned FrameDestroyOpcode;
/// Entry block.
const MachineBasicBlock *Entry;
+ typedef SmallSetVector<unsigned, 16> SetOfRegs;
+ /// Registers that need to be saved for the current function.
+ mutable SetOfRegs CurrentCSRs;
+ /// Current MachineFunction.
+ MachineFunction *MachineFunc;
/// \brief Check if \p MI uses or defines a callee-saved register or
/// a frame index. If this is the case, this means \p MI must happen
/// after Save and before Restore.
- bool useOrDefCSROrFI(const MachineInstr &MI) const;
+ bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const;
+
+ const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
+ if (CurrentCSRs.empty()) {
+ BitVector SavedRegs;
+ const TargetFrameLowering *TFI =
+ MachineFunc->getSubtarget().getFrameLowering();
+
+ TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS);
+
+ for (int Reg = SavedRegs.find_first(); Reg != -1;
+ Reg = SavedRegs.find_next(Reg))
+ CurrentCSRs.insert((unsigned)Reg);
+ }
+ return CurrentCSRs;
+ }
/// \brief Update the Save and Restore points such that \p MBB is in
/// the region that is dominated by Save and post-dominated by Restore
/// and Save and Restore still match the safe point definition.
/// Such point may not exist and Save and/or Restore may be null after
/// this call.
- void updateSaveRestorePoints(MachineBasicBlock &MBB);
+ void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
/// \brief Initialize the pass for \p MF.
void init(MachineFunction &MF) {
@@ -140,6 +169,8 @@ class ShrinkWrap : public MachineFunctionPass {
FrameSetupOpcode = TII.getCallFrameSetupOpcode();
FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
Entry = &MF.front();
+ CurrentCSRs.clear();
+ MachineFunc = &MF;
++NumFunc;
}
@@ -148,6 +179,9 @@ class ShrinkWrap : public MachineFunctionPass {
/// shrink-wrapping.
bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
+ /// \brief Check if shrink wrapping is enabled for this target and function.
+ static bool isShrinkWrapEnabled(const MachineFunction &MF);
+
public:
static char ID;
@@ -185,27 +219,34 @@ INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false)
-bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const {
+bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
+ RegScavenger *RS) const {
if (MI.getOpcode() == FrameSetupOpcode ||
MI.getOpcode() == FrameDestroyOpcode) {
DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
return true;
}
for (const MachineOperand &MO : MI.operands()) {
- bool UseCSR = false;
+ bool UseOrDefCSR = false;
if (MO.isReg()) {
unsigned PhysReg = MO.getReg();
if (!PhysReg)
continue;
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
"Unallocated register?!");
- UseCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+ UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+ } else if (MO.isRegMask()) {
+ // Check if this regmask clobbers any of the CSRs.
+ for (unsigned Reg : getCurrentCSRs(RS)) {
+ if (MO.clobbersPhysReg(Reg)) {
+ UseOrDefCSR = true;
+ break;
+ }
+ }
}
- // TODO: Handle regmask more accurately.
- // For now, be conservative about them.
- if (UseCSR || MO.isFI() || MO.isRegMask()) {
- DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI()
- << "): " << MI << '\n');
+ if (UseOrDefCSR || MO.isFI()) {
+ DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
+ << MO.isFI() << "): " << MI << '\n');
return true;
}
}
@@ -225,7 +266,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
return IDom;
}
-void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
+void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
+ RegScavenger *RS) {
// Get rid of the easy cases first.
if (!Save)
Save = &MBB;
@@ -246,7 +288,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
// terminator.
if (Restore == &MBB) {
for (const MachineInstr &Terminator : MBB.terminators()) {
- if (!useOrDefCSROrFI(Terminator))
+ if (!useOrDefCSROrFI(Terminator, RS))
continue;
// One of the terminator needs to happen before the restore point.
if (MBB.succ_empty()) {
@@ -277,7 +319,24 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
while (Save && Restore &&
(!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
!(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
- MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) {
+ // Post-dominance is not enough in loops to ensure that all uses/defs
+ // are after the prologue and before the epilogue at runtime.
+ // E.g.,
+ // while(1) {
+ // Save
+ // Restore
+ // if (...)
+ // break;
+ // use/def CSRs
+ // }
+ // All the uses/defs of CSRs are dominated by Save and post-dominated
+ // by Restore. However, the CSRs uses are still reachable after
+ // Restore and before Save are executed.
+ //
+ // For now, just push the restore/save points outside of loops.
+ // FIXME: Refine the criteria to still find interesting cases
+ // for loops.
+ MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
// Fix (A).
if (!SaveDominatesRestore) {
Save = MDT->findNearestCommonDominator(Save, Restore);
@@ -288,35 +347,72 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
Restore = MPDT->findNearestCommonDominator(Restore, Save);
// Fix (C).
- if (Save && Restore && Save != Restore &&
- MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) {
- if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore))
- // Push Save outside of this loop.
- Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
- else
+ if (Save && Restore &&
+ (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
+ if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
+ // Push Save outside of this loop if immediate dominator is different
+ // from save block. If immediate dominator is not different, bail out.
+ MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT);
+ if (IDom != Save)
+ Save = IDom;
+ else {
+ Save = nullptr;
+ break;
+ }
+ } else {
+ // If the loop does not exit, there is no point in looking
+ // for a post-dominator outside the loop.
+ SmallVector<MachineBasicBlock*, 4> ExitBlocks;
+ MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks);
// Push Restore outside of this loop.
- Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+ // Look for the immediate post-dominator of the loop exits.
+ MachineBasicBlock *IPdom = Restore;
+ for (MachineBasicBlock *LoopExitBB: ExitBlocks) {
+ IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT);
+ if (!IPdom)
+ break;
+ }
+ // If the immediate post-dominator is not in a less nested loop,
+ // then we are stuck in a program with an infinite loop.
+ // In that case, we will not find a safe point, hence, bail out.
+ if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore))
+ Restore = IPdom;
+ else {
+ Restore = nullptr;
+ break;
+ }
+ }
}
}
}
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
- if (MF.empty())
+ if (MF.empty() || !isShrinkWrapEnabled(MF))
return false;
+
DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
init(MF);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ std::unique_ptr<RegScavenger> RS(
+ TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
for (MachineBasicBlock &MBB : MF) {
DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName()
<< '\n');
+ if (MBB.isEHFuncletEntry()) {
+ DEBUG(dbgs() << "EH Funclets are not supported yet.\n");
+ return false;
+ }
+
for (const MachineInstr &MI : MBB) {
- if (!useOrDefCSROrFI(MI))
+ if (!useOrDefCSROrFI(MI, RS.get()))
continue;
// Save (resp. restore) point must dominate (resp. post dominate)
// MI. Look for the proper basic block for those.
- updateSaveRestorePoints(MBB);
+ updateSaveRestorePoints(MBB, RS.get());
// If we are at a point where we cannot improve the placement of
// save/restore instructions, just give up.
if (!ArePointsInteresting()) {
@@ -368,7 +464,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
break;
NewBB = Restore;
}
- updateSaveRestorePoints(*NewBB);
+ updateSaveRestorePoints(*NewBB, RS.get());
} while (Save && Restore);
if (!ArePointsInteresting()) {
@@ -386,3 +482,30 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
++NumCandidates;
return false;
}
+
+bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+
+ switch (EnableShrinkWrapOpt) {
+ case cl::BOU_UNSET:
+ return TFI->enableShrinkWrapping(MF) &&
+ // Windows with CFI has some limitations that make it impossible
+ // to use shrink-wrapping.
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ // Sanitizers look at the value of the stack at the location
+ // of the crash. Since a crash can happen anywhere, the
+ // frame must be lowered before anything else happen for the
+ // sanitizers to be able to get a correct stack frame.
+ !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
+ MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
+ MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory));
+ // If EnableShrinkWrap is set, it takes precedence on whatever the
+ // target sets. The rational is that we assume we want to test
+ // something related to shrink-wrapping.
+ case cl::BOU_TRUE:
+ return true;
+ case cl::BOU_FALSE:
+ return false;
+ }
+ llvm_unreachable("Invalid shrink-wrapping state");
+}
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index d236e1f..e1f242a 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -50,7 +50,7 @@ class SjLjEHPrepare : public FunctionPass {
Type *FunctionContextTy;
Constant *RegisterFn;
Constant *UnregisterFn;
- Constant *BuiltinSetjmpFn;
+ Constant *BuiltinSetupDispatchFn;
Constant *FrameAddrFn;
Constant *StackAddrFn;
Constant *StackRestoreFn;
@@ -112,7 +112,8 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
- BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+ BuiltinSetupDispatchFn =
+ Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch);
LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
@@ -178,8 +179,8 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
// values and replace the LPI with that aggregate.
Type *LPadType = LPI->getType();
Value *LPadVal = UndefValue::get(LPadType);
- IRBuilder<> Builder(
- std::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+ auto *SelI = cast<Instruction>(SelVal);
+ IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator()));
LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
@@ -190,7 +191,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
/// it with all of the data that we know at this point.
Value *SjLjEHPrepare::setupFunctionContext(Function &F,
ArrayRef<LandingPadInst *> LPads) {
- BasicBlock *EntryBB = F.begin();
+ BasicBlock *EntryBB = &F.front();
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an alloca
@@ -198,12 +199,13 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
auto &DL = F.getParent()->getDataLayout();
unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
- EntryBB->begin());
+ &EntryBB->front());
// Fill in the function context structure.
for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
LandingPadInst *LPI = LPads[I];
- IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
+ IRBuilder<> Builder(LPI->getParent(),
+ LPI->getParent()->getFirstInsertionPt());
// Reference the __data field.
Value *FCData =
@@ -250,21 +252,20 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
while (isa<AllocaInst>(AfterAllocaInsPt) &&
isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
++AfterAllocaInsPt;
+ assert(AfterAllocaInsPt != F.front().end());
- for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
- ++AI) {
- Type *Ty = AI->getType();
+ for (auto &AI : F.args()) {
+ Type *Ty = AI.getType();
// Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
Value *TrueValue = ConstantInt::getTrue(F.getContext());
Value *UndefValue = UndefValue::get(Ty);
- Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue,
- AI->getName() + ".tmp",
- AfterAllocaInsPt);
- AI->replaceAllUsesWith(SI);
+ Instruction *SI = SelectInst::Create(
+ TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt);
+ AI.replaceAllUsesWith(SI);
// Reset the operand, because it was clobbered by the RAUW above.
- SI->setOperand(1, AI);
+ SI->setOperand(1, &AI);
}
}
@@ -279,7 +280,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
// Ignore obvious cases we don't have to handle. In particular, most
// instructions either have no uses or only have a single use inside the
// current block. Ignore them quickly.
- Instruction *Inst = II;
+ Instruction *Inst = &*II;
if (Inst->use_empty())
continue;
if (Inst->hasOneUse() &&
@@ -360,7 +361,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
DemotePHIToStack(PN);
// Move the landingpad instruction back to the top of the landing pad block.
- LPI->moveBefore(UnwindBlock->begin());
+ LPI->moveBefore(&UnwindBlock->front());
}
}
@@ -400,7 +401,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
Value *FuncCtx =
setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
- BasicBlock *EntryBB = F.begin();
+ BasicBlock *EntryBB = &F.front();
IRBuilder<> Builder(EntryBB->getTerminator());
// Get a reference to the jump buffer.
@@ -421,9 +422,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
Val = Builder.CreateCall(StackAddrFn, {}, "sp");
Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
- // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
- Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy());
- Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg);
+ // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf.
+ Builder.CreateCall(BuiltinSetupDispatchFn, {});
// Store a pointer to the function context so that the back-end will know
// where to look for it.
@@ -475,7 +475,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
continue;
}
Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
- StackAddr->insertAfter(I);
+ StackAddr->insertAfter(&*I);
Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
StoreStackAddr->insertAfter(StackAddr);
}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index 025ae70..c9d23f6 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -172,8 +172,8 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
// optionally includes an additional position prior to MBB->begin(), indicated
// by the includeStart flag. This is done so that we can iterate MIs in a MBB
// in parallel with SlotIndexes, but there should be a better way to do this.
- IndexList::iterator ListB = startIdx.listEntry();
- IndexList::iterator ListI = endIdx.listEntry();
+ IndexList::iterator ListB = startIdx.listEntry()->getIterator();
+ IndexList::iterator ListI = endIdx.listEntry()->getIterator();
MachineBasicBlock::iterator MBBI = End;
bool pastStart = false;
while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
index 97a5424..d30cfc2 100644
--- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -36,7 +36,6 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -188,9 +187,9 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
BlockFrequencies.resize(mf.getNumBlockIDs());
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
setThreshold(MBFI->getEntryFreq());
- for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
- unsigned Num = I->getNumber();
- BlockFrequencies[Num] = MBFI->getBlockFreq(I);
+ for (auto &I : mf) {
+ unsigned Num = I.getNumber();
+ BlockFrequencies[Num] = MBFI->getBlockFreq(&I);
}
// We never change the function.
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index dab1dfe..51dddab 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -56,6 +56,7 @@ void SplitAnalysis::clear() {
SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
+ // FIXME: Handle multiple EH pad successors.
const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
@@ -176,10 +177,11 @@ bool SplitAnalysis::calcLiveBlockInfo() {
UseE = UseSlots.end();
// Loop over basic blocks where CurLI is live.
- MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ MachineFunction::iterator MFI =
+ LIS.getMBBFromIndex(LVI->start)->getIterator();
for (;;) {
BlockInfo BI;
- BI.MBB = MFI;
+ BI.MBB = &*MFI;
SlotIndex Start, Stop;
std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
@@ -259,7 +261,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
if (LVI->start < Stop)
++MFI;
else
- MFI = LIS.getMBBFromIndex(LVI->start);
+ MFI = LIS.getMBBFromIndex(LVI->start)->getIterator();
}
assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
@@ -275,8 +277,9 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
unsigned Count = 0;
// Loop over basic blocks where li is live.
- MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
- SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+ MachineFunction::const_iterator MFI =
+ LIS.getMBBFromIndex(LVI->start)->getIterator();
+ SlotIndex Stop = LIS.getMBBEndIdx(&*MFI);
for (;;) {
++Count;
LVI = li->advanceTo(LVI, Stop);
@@ -284,7 +287,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
return Count;
do {
++MFI;
- Stop = LIS.getMBBEndIdx(MFI);
+ Stop = LIS.getMBBEndIdx(&*MFI);
} while (Stop <= LVI->start);
}
}
@@ -864,9 +867,9 @@ bool SplitEditor::transferValues() {
// This value has multiple defs in RegIdx, but it wasn't rematerialized,
// so the live range is accurate. Add live-in blocks in [Start;End) to the
// LiveInBlocks.
- MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
SlotIndex BlockStart, BlockEnd;
- std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+ std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(&*MBB);
// The first block may be live-in, or it may have its own def.
if (Start != BlockStart) {
@@ -875,7 +878,7 @@ bool SplitEditor::transferValues() {
DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
// MBB has its own def. Is it also live-out?
if (BlockEnd <= End)
- LRC.setLiveOutValue(MBB, VNI);
+ LRC.setLiveOutValue(&*MBB, VNI);
// Skip to the next block for live-in.
++MBB;
@@ -886,23 +889,23 @@ bool SplitEditor::transferValues() {
assert(Start <= BlockStart && "Expected live-in block");
while (BlockStart < End) {
DEBUG(dbgs() << ">BB#" << MBB->getNumber());
- BlockEnd = LIS.getMBBEndIdx(MBB);
+ BlockEnd = LIS.getMBBEndIdx(&*MBB);
if (BlockStart == ParentVNI->def) {
// This block has the def of a parent PHI, so it isn't live-in.
assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped parent PHI");
if (End >= BlockEnd)
- LRC.setLiveOutValue(MBB, VNI); // Live-out as well.
+ LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
} else {
// This block needs a live-in value. The last block covered may not
// be live-out.
if (End < BlockEnd)
- LRC.addLiveInBlock(LR, MDT[MBB], End);
+ LRC.addLiveInBlock(LR, MDT[&*MBB], End);
else {
// Live-through, and we don't know the value.
- LRC.addLiveInBlock(LR, MDT[MBB]);
- LRC.setLiveOutValue(MBB, nullptr);
+ LRC.addLiveInBlock(LR, MDT[&*MBB]);
+ LRC.setLiveOutValue(&*MBB, nullptr);
}
}
BlockStart = BlockEnd;
@@ -1081,16 +1084,14 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
ConnectedVNInfoEqClasses ConEQ(LIS);
for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
// Don't use iterators, they are invalidated by create() below.
- LiveInterval *li = &LIS.getInterval(Edit->get(i));
- unsigned NumComp = ConEQ.Classify(li);
- if (NumComp <= 1)
- continue;
- DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n');
- SmallVector<LiveInterval*, 8> dups;
- dups.push_back(li);
- for (unsigned j = 1; j != NumComp; ++j)
- dups.push_back(&Edit->createEmptyInterval());
- ConEQ.Distribute(&dups[0], MRI);
+ unsigned VReg = Edit->get(i);
+ LiveInterval &LI = LIS.getInterval(VReg);
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS.splitSeparateComponents(LI, SplitLIs);
+ unsigned Original = VRM.getOriginal(VReg);
+ for (LiveInterval *SplitLI : SplitLIs)
+ VRM.setIsSplitFromReg(SplitLI->reg, Original);
+
// The new intervals all map back to i.
if (LRMap)
LRMap->resize(Edit->size(), i);
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index 116eef6..b3cd8b3 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -94,7 +94,9 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
default:
llvm_unreachable("Unrecognized operand type.");
case StackMaps::DirectMemRefOp: {
- unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
+ auto &DL = AP.MF->getDataLayout();
+
+ unsigned Size = DL.getPointerSizeInBits();
assert((Size % 8) == 0 && "Need pointer size in bytes.");
Size /= 8;
unsigned Reg = (++MOI)->getReg();
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index bcea37a..db3fef5 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -373,7 +373,7 @@ bool StackProtector::InsertStackProtectors() {
Value *StackGuardVar = nullptr; // The stack guard variable.
for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
if (!RI)
continue;
@@ -433,7 +433,7 @@ bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = CreateFailBB();
// Split the basic block before the return instruction.
- BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+ BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
// Update the dominator tree if we need to.
if (DT && DT->isReachableFromEntry(BB)) {
@@ -453,22 +453,20 @@ bool StackProtector::InsertStackProtectors() {
LoadInst *LI1 = B.CreateLoad(StackGuardVar);
LoadInst *LI2 = B.CreateLoad(AI);
Value *Cmp = B.CreateICmpEQ(LI1, LI2);
- unsigned SuccessWeight =
- BranchProbabilityInfo::getBranchWeightStackProtector(true);
- unsigned FailureWeight =
- BranchProbabilityInfo::getBranchWeightStackProtector(false);
+ auto SuccessProb =
+ BranchProbabilityInfo::getBranchProbStackProtector(true);
+ auto FailureProb =
+ BranchProbabilityInfo::getBranchProbStackProtector(false);
MDNode *Weights = MDBuilder(F->getContext())
- .createBranchWeights(SuccessWeight, FailureWeight);
+ .createBranchWeights(SuccessProb.getNumerator(),
+ FailureProb.getNumerator());
B.CreateCondBr(Cmp, NewBB, FailBB, Weights);
}
}
// Return if we didn't modify any basic blocks. i.e., there are no return
// statements in the function.
- if (!HasPrologue)
- return false;
-
- return true;
+ return HasPrologue;
}
/// CreateFailBB - Create a basic block to jump to when the stack protector
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index a5a175f..51f4d0e 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -318,7 +318,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
if (NewFI == -1 || (NewFI == (int)SS))
continue;
- const PseudoSourceValue *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+ const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI);
SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS];
for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i)
RefMMOs[i]->setValue(NewSV);
diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
index 95dfd75..3f60e18 100644
--- a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
+++ b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
@@ -34,9 +34,9 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
- Optional<bool> isGCManagedPointer(const Value *V) const override {
+ Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
- PointerType *PT = cast<PointerType>(V->getType());
+ const PointerType *PT = cast<PointerType>(Ty);
// For the sake of this example GC, we arbitrarily pick addrspace(1) as our
// GC managed heap. We know that a pointer into this heap needs to be
// updated and that no other pointer does. Note that addrspace(1) is used
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index 237460c..d2fbf53 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -59,7 +59,7 @@ TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
namespace {
- /// TailDuplicatePass - Perform tail duplication.
+ /// Perform tail duplication.
class TailDuplicatePass : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -69,11 +69,11 @@ namespace {
std::unique_ptr<RegScavenger> RS;
bool PreRegAlloc;
- // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+ // A list of virtual registers for which to update SSA form.
SmallVector<unsigned, 16> SSAUpdateVRs;
- // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
- // source virtual registers.
+ // For each virtual register in SSAUpdateVals keep a list of source virtual
+ // registers.
DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
public:
@@ -161,7 +161,7 @@ void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const {
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = I;
+ MachineBasicBlock *MBB = &*I;
SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
MBB->pred_end());
MachineBasicBlock::iterator MI = MBB->begin();
@@ -207,7 +207,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
}
}
-/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+/// Tail duplicate the block and cleanup.
bool
TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
bool IsSimple,
@@ -310,9 +310,9 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
return true;
}
-/// TailDuplicateBlocks - Look for small blocks that are unconditionally
-/// branched to and do not fall through. Tail-duplicate their instructions
-/// into their predecessors to eliminate (dynamic) branches.
+/// Look for small blocks that are unconditionally branched to and do not fall
+/// through. Tail-duplicate their instructions into their predecessors to
+/// eliminate (dynamic) branches.
bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
bool MadeChange = false;
@@ -322,7 +322,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
}
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
- MachineBasicBlock *MBB = I++;
+ MachineBasicBlock *MBB = &*I++;
if (NumTails == TailDupLimit)
break;
@@ -375,8 +375,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
}
}
-/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
-/// SSA update.
+/// Add a definition and source virtual registers pair for SSA update.
void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
MachineBasicBlock *BB) {
DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
@@ -390,9 +389,8 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
}
}
-/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
-/// Remember the source register that's contributed by PredBB and update SSA
-/// update map.
+/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
+/// source register that's contributed by PredBB and update SSA update map.
void TailDuplicatePass::ProcessPHI(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
DenseMap<unsigned, unsigned> &LocalVRMap,
@@ -422,7 +420,7 @@ void TailDuplicatePass::ProcessPHI(
MI->eraseFromParent();
}
-/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// Duplicate a TailBB instruction to PredBB and update
/// the source operands due to earlier PHI translation.
void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
MachineBasicBlock *TailBB,
@@ -459,9 +457,9 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
PredBB->insert(PredBB->instr_end(), NewMI);
}
-/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
-/// blocks, the successors have gained new predecessors. Update the PHI
-/// instructions in them accordingly.
+/// After FromBB is tail duplicated into its predecessor blocks, the successors
+/// have gained new predecessors. Update the PHI instructions in them
+/// accordingly.
void
TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
@@ -545,7 +543,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
}
}
-/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
+/// Determine if it is profitable to duplicate this block.
bool
TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
bool IsSimple,
@@ -563,6 +561,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// compensate for the duplication.
unsigned MaxDuplicateCount;
if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ // FIXME: Use Function::optForSize().
MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
MaxDuplicateCount = 1;
else
@@ -584,30 +583,51 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
- for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
+ for (MachineInstr &MI : TailBB) {
// Non-duplicable things shouldn't be tail-duplicated.
- if (I->isNotDuplicable())
+ if (MI.isNotDuplicable())
return false;
// Do not duplicate 'return' instructions if this is a pre-regalloc run.
// A return may expand into a lot more instructions (e.g. reload of callee
// saved registers) after PEI.
- if (PreRegAlloc && I->isReturn())
+ if (PreRegAlloc && MI.isReturn())
return false;
// Avoid duplicating calls before register allocation. Calls presents a
// barrier to register allocation so duplicating them may end up increasing
// spills.
- if (PreRegAlloc && I->isCall())
+ if (PreRegAlloc && MI.isCall())
return false;
- if (!I->isPHI() && !I->isDebugValue())
+ if (!MI.isPHI() && !MI.isDebugValue())
InstrCount += 1;
if (InstrCount > MaxDuplicateCount)
return false;
}
+ // Check if any of the successors of TailBB has a PHI node in which the
+ // value corresponding to TailBB uses a subregister.
+ // If a phi node uses a register paired with a subregister, the actual
+ // "value type" of the phi may differ from the type of the register without
+ // any subregisters. Due to a bug, tail duplication may add a new operand
+ // without a necessary subregister, producing an invalid code. This is
+ // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
+ // Disable tail duplication for this case for now, until the problem is
+ // fixed.
+ for (auto SB : TailBB.successors()) {
+ for (auto &I : *SB) {
+ if (!I.isPHI())
+ break;
+ unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
+ assert(Idx != 0);
+ MachineOperand &PU = I.getOperand(Idx);
+ if (PU.getSubReg() != 0)
+ return false;
+ }
+ }
+
if (HasIndirectbr && PreRegAlloc)
return true;
@@ -620,7 +640,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
return canCompletelyDuplicateBB(TailBB);
}
-/// isSimpleBB - True if this BB has only one unconditional jump.
+/// True if this BB has only one unconditional jump.
bool
TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
if (TailBB->succ_size() != 1)
@@ -636,22 +656,16 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
static bool
bothUsedInPHI(const MachineBasicBlock &A,
SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
- for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
- SE = A.succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *BB = *SI;
+ for (MachineBasicBlock *BB : A.successors())
if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
return true;
- }
return false;
}
bool
TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
- for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
- PE = BB.pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
-
+ for (MachineBasicBlock *PredBB : BB.predecessors()) {
if (PredBB->succ_size() > 1)
return false;
@@ -680,7 +694,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
PE = Preds.end(); PI != PE; ++PI) {
MachineBasicBlock *PredBB = *PI;
- if (PredBB->getLandingPadSuccessor())
+ if (PredBB->hasEHPadSuccessor())
continue;
if (bothUsedInPHI(*PredBB, Succs))
@@ -696,7 +710,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
<< "From simple Succ: " << *TailBB);
MachineBasicBlock *NewTarget = *TailBB->succ_begin();
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB));
+ MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator());
// Make PredFBB explicit.
if (PredCond.empty())
@@ -731,19 +745,19 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
if (PredTBB)
TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
- uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB);
- PredBB->removeSuccessor(TailBB);
- unsigned NumSuccessors = PredBB->succ_size();
- assert(NumSuccessors <= 1);
- if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
- PredBB->addSuccessor(NewTarget, Weight);
+ if (!PredBB->isSuccessor(NewTarget))
+ PredBB->replaceSuccessor(TailBB, NewTarget);
+ else {
+ PredBB->removeSuccessor(TailBB, true);
+ assert(PredBB->succ_size() <= 1);
+ }
TDBBs.push_back(PredBB);
}
return Changed;
}
-/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
bool
TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
@@ -798,13 +812,12 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
RS->enterBasicBlock(PredBB);
if (!PredBB->empty())
RS->forward(std::prev(PredBB->end()));
- for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
- E = TailBB->livein_end(); I != E; ++I) {
- if (!RS->isRegUsed(*I, false))
+ for (const auto &LI : TailBB->liveins()) {
+ if (!RS->isRegUsed(LI.PhysReg, false))
// If a register is previously livein to the tail but it's not live
// at the end of predecessor BB, then it should be added to its
// livein list.
- PredBB->addLiveIn(*I);
+ PredBB->addLiveIn(LI);
}
}
@@ -845,7 +858,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
"TailDuplicate called on block with multiple successors!");
for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
E = TailBB->succ_end(); I != E; ++I)
- PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I));
+ PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I));
Changed = true;
++NumTailDups;
@@ -854,7 +867,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// If TailBB was duplicated into all its predecessors except for the prior
// block, which falls through unconditionally, move the contents of this
// block into the prior block.
- MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB));
+ MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
SmallVector<MachineOperand, 4> PriorCond;
// This has to check PrevBB->succ_size() because EH edges are ignored by
@@ -960,8 +973,8 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
return Changed;
}
-/// RemoveDeadBlock - Remove the specified dead machine basic block from the
-/// function, updating the CFG.
+/// Remove the specified dead machine basic block from the function, updating
+/// the CFG.
void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f3cccd8..679ade1 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -32,25 +33,22 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
return Attr.getValueAsString() == "true";
}
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index. This is the default implementation
-/// which is overridden for some targets.
-int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getObjectOffset(FI) + MFI->getStackSize() -
- getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
-}
-
+/// Returns the displacement from the frame register to the stack
+/// frame of the specified index, along with the frame register used
+/// (in output arg FrameReg). This is the default implementation which
+/// is overridden for some targets.
int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
// By default, assume all frame indices are referenced via whatever
// getFrameRegister() says. The target can override this if it's doing
// something different.
FrameReg = RI->getFrameRegister(MF);
- return getFrameIndexOffset(MF, FI);
+
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
}
bool TargetFrameLowering::needsFrameIndexResolution(
@@ -84,3 +82,13 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(Reg);
}
}
+
+unsigned TargetFrameLowering::getStackAlignmentSkew(
+ const MachineFunction &MF) const {
+ // When HHVM function is called, the stack is skewed as the return address
+ // is removed from the stack before we enter the function.
+ if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM))
+ return MF.getTarget().getPointerSize();
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 97ca025..6eaf991 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -118,23 +118,24 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
MBB->addSuccessor(NewDest);
}
-// commuteInstruction - The default implementation of this method just exchanges
-// the two operands returned by findCommutedOpIndices.
-MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
- bool NewMI) const {
+MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned Idx1,
+ unsigned Idx2) const {
const MCInstrDesc &MCID = MI->getDesc();
bool HasDef = MCID.getNumDefs();
if (HasDef && !MI->getOperand(0).isReg())
// No idea how to commute this instruction. Target should implement its own.
return nullptr;
- unsigned Idx1, Idx2;
- if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
- assert(MI->isCommutable() && "Precondition violation: MI must be commutable.");
- return nullptr;
- }
+ unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1;
+ unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2;
+ assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) &&
+ CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 &&
+ "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands.");
assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
"This only knows how to commute register operands so far");
+
unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
unsigned Reg1 = MI->getOperand(Idx1).getReg();
unsigned Reg2 = MI->getOperand(Idx2).getReg();
@@ -184,9 +185,53 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
return MI;
}
-/// findCommutedOpIndices - If specified MI is commutable, return the two
-/// operand indices that would swap value. Return true if the instruction
-/// is not in a form which this routine understands.
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose
+ // any commutable operand, which is done in findCommutedOpIndices() method
+ // called below.
+ if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) &&
+ !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) {
+ assert(MI->isCommutable() &&
+ "Precondition violation: MI must be commutable.");
+ return nullptr;
+ }
+ return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+}
+
+bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1,
+ unsigned &ResultIdx2,
+ unsigned CommutableOpIdx1,
+ unsigned CommutableOpIdx2) {
+ if (ResultIdx1 == CommuteAnyOperandIndex &&
+ ResultIdx2 == CommuteAnyOperandIndex) {
+ ResultIdx1 = CommutableOpIdx1;
+ ResultIdx2 = CommutableOpIdx2;
+ } else if (ResultIdx1 == CommuteAnyOperandIndex) {
+ if (ResultIdx2 == CommutableOpIdx1)
+ ResultIdx1 = CommutableOpIdx2;
+ else if (ResultIdx2 == CommutableOpIdx2)
+ ResultIdx1 = CommutableOpIdx1;
+ else
+ return false;
+ } else if (ResultIdx2 == CommuteAnyOperandIndex) {
+ if (ResultIdx1 == CommutableOpIdx1)
+ ResultIdx2 = CommutableOpIdx2;
+ else if (ResultIdx1 == CommutableOpIdx2)
+ ResultIdx2 = CommutableOpIdx1;
+ else
+ return false;
+ } else
+ // Check that the result operand indices match the given commutable
+ // operand indices.
+ return (ResultIdx1 == CommutableOpIdx1 && ResultIdx2 == CommutableOpIdx2) ||
+ (ResultIdx1 == CommutableOpIdx2 && ResultIdx2 == CommutableOpIdx1);
+
+ return true;
+}
+
bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
@@ -196,10 +241,15 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isCommutable())
return false;
+
// This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
// is not true, then the target must implement this.
- SrcOpIdx1 = MCID.getNumDefs();
- SrcOpIdx2 = SrcOpIdx1 + 1;
+ unsigned CommutableOpIdx1 = MCID.getNumDefs();
+ unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1;
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+ CommutableOpIdx1, CommutableOpIdx2))
+ return false;
+
if (!MI->getOperand(SrcOpIdx1).isReg() ||
!MI->getOperand(SrcOpIdx2).isReg())
// No idea.
@@ -207,7 +257,6 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
return true;
}
-
bool
TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
if (!MI->isTerminator()) return false;
@@ -315,7 +364,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
- if (!MF.getTarget().getDataLayout()->isLittleEndian()) {
+ if (!MF.getDataLayout().isLittleEndian()) {
Offset = RC->getSize() - (Offset + Size);
}
return true;
@@ -384,11 +433,6 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
llvm_unreachable("Not a MachO target");
}
-bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const {
- return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
-}
-
static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops, int FrameIndex,
const TargetInstrInfo &TII) {
@@ -489,10 +533,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
"Folded a use to a non-load!");
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- Flags, MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
NewMI->addMemOperand(MF, MMO);
return NewMI;
@@ -517,6 +560,217 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
return --Pos;
}
+bool TargetInstrInfo::hasReassociableOperands(
+ const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
+ const MachineOperand &Op1 = Inst.getOperand(1);
+ const MachineOperand &Op2 = Inst.getOperand(2);
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ // We need virtual register definitions for the operands that we will
+ // reassociate.
+ MachineInstr *MI1 = nullptr;
+ MachineInstr *MI2 = nullptr;
+ if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+ MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+ if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+ MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+
+ // And they need to be in the trace (otherwise, they won't have a depth).
+ return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB;
+}
+
+bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
+ bool &Commuted) const {
+ const MachineBasicBlock *MBB = Inst.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
+ MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+ unsigned AssocOpcode = Inst.getOpcode();
+
+ // If only one operand has the same opcode and it's the second source operand,
+ // the operands must be commuted.
+ Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+ if (Commuted)
+ std::swap(MI1, MI2);
+
+ // 1. The previous instruction must be the same type as Inst.
+ // 2. The previous instruction must have virtual register definitions for its
+ // operands in the same basic block as Inst.
+ // 3. The previous instruction's result must only be used by Inst.
+ return MI1->getOpcode() == AssocOpcode &&
+ hasReassociableOperands(*MI1, MBB) &&
+ MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
+}
+
+// 1. The operation must be associative and commutative.
+// 2. The instruction must have virtual register definitions for its
+// operands in the same basic block.
+// 3. The instruction must have a reassociable sibling.
+bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
+ bool &Commuted) const {
+ return isAssociativeAndCommutative(Inst) &&
+ hasReassociableOperands(Inst, Inst.getParent()) &&
+ hasReassociableSibling(Inst, Commuted);
+}
+
+// The concept of the reassociation pass is that these operations can benefit
+// from this kind of transformation:
+//
+// A = ? op ?
+// B = A op X (Prev)
+// C = B op Y (Root)
+// -->
+// A = ? op ?
+// B = X op Y
+// C = A op B
+//
+// breaking the dependency between A and B, allowing them to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+
+// FIXME: This has the potential to be expensive (compile time) while not
+// improving the code at all. Some ways to limit the overhead:
+// 1. Track successful transforms; bail out if hit rate gets too low.
+// 2. Only enable at -O3 or some other non-default optimization level.
+// 3. Pre-screen pattern candidates here: if an operand of the previous
+// instruction is known to not increase the critical path, then don't match
+// that pattern.
+bool TargetInstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+
+ bool Commute;
+ if (isReassociationCandidate(Root, Commute)) {
+ // We found a sequence of instructions that may be suitable for a
+ // reassociation of operands to increase ILP. Specify each commutation
+ // possibility for the Prev instruction in the sequence and let the
+ // machine combiner decide if changing the operands is worthwhile.
+ if (Commute) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_AX_YB);
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XA_YB);
+ } else {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_AX_BY);
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XA_BY);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// Attempt the reassociation transformation to reduce critical path length.
+/// See the above comments before getMachineCombinerPatterns().
+void TargetInstrInfo::reassociateOps(
+ MachineInstr &Root, MachineInstr &Prev,
+ MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ MachineFunction *MF = Root.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+ // This array encodes the operand index for each parameter because the
+ // operands may be commuted. Each row corresponds to a pattern value,
+ // and each column specifies the index of A, B, X, Y.
+ unsigned OpIdx[4][4] = {
+ { 1, 1, 2, 2 },
+ { 1, 2, 2, 1 },
+ { 2, 1, 1, 2 },
+ { 2, 2, 1, 1 }
+ };
+
+ int Row;
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break;
+ case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break;
+ case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break;
+ case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break;
+ default: llvm_unreachable("unexpected MachineCombinerPattern");
+ }
+
+ MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]);
+ MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]);
+ MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]);
+ MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
+ MachineOperand &OpC = Root.getOperand(0);
+
+ unsigned RegA = OpA.getReg();
+ unsigned RegB = OpB.getReg();
+ unsigned RegX = OpX.getReg();
+ unsigned RegY = OpY.getReg();
+ unsigned RegC = OpC.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA))
+ MRI.constrainRegClass(RegA, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI.constrainRegClass(RegB, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegX))
+ MRI.constrainRegClass(RegX, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegY))
+ MRI.constrainRegClass(RegY, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegC))
+ MRI.constrainRegClass(RegC, RC);
+
+ // Create a new virtual register for the result of (X op Y) instead of
+ // recycling RegB because the MachineCombiner's computation of the critical
+ // path requires a new register definition rather than an existing one.
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+ unsigned Opcode = Root.getOpcode();
+ bool KillA = OpA.isKill();
+ bool KillX = OpX.isKill();
+ bool KillY = OpY.isKill();
+
+ // Create new instructions for insertion.
+ MachineInstrBuilder MIB1 =
+ BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegY, getKillRegState(KillY));
+ MachineInstrBuilder MIB2 =
+ BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+ .addReg(RegA, getKillRegState(KillA))
+ .addReg(NewVR, getKillRegState(true));
+
+ setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
+
+ // Record new instructions for insertion and old instructions for deletion.
+ InsInstrs.push_back(MIB1);
+ InsInstrs.push_back(MIB2);
+ DelInstrs.push_back(&Prev);
+ DelInstrs.push_back(&Root);
+}
+
+void TargetInstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+
+ // Select the previous instruction in the sequence based on the input pattern.
+ MachineInstr *Prev = nullptr;
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ break;
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+ break;
+ default:
+ break;
+ }
+
+ assert(Prev && "Unknown pattern for machine combiner");
+
+ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+ return;
+}
+
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
@@ -661,6 +915,7 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const {
return 0;
int SPAdj = MI->getOperand(0).getImm();
+ SPAdj = TFI->alignSPAdjust(SPAdj);
if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) ||
(StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode))
@@ -686,10 +941,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
// modification.
const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
- return true;
-
- return false;
+ return MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI);
}
// Provide a global flag for disabling the PreRA hazard recognizer that targets
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index ecfd659..36a31c9 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -247,13 +247,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
- Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
- Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
- Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
- Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
@@ -266,13 +262,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
- Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
- Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
- Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
- Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
@@ -501,10 +493,6 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f32) {
- if (RetVT == MVT::i8)
- return FPTOSINT_F32_I8;
- if (RetVT == MVT::i16)
- return FPTOSINT_F32_I16;
if (RetVT == MVT::i32)
return FPTOSINT_F32_I32;
if (RetVT == MVT::i64)
@@ -512,10 +500,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::i128)
return FPTOSINT_F32_I128;
} else if (OpVT == MVT::f64) {
- if (RetVT == MVT::i8)
- return FPTOSINT_F64_I8;
- if (RetVT == MVT::i16)
- return FPTOSINT_F64_I16;
if (RetVT == MVT::i32)
return FPTOSINT_F64_I32;
if (RetVT == MVT::i64)
@@ -551,10 +535,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f32) {
- if (RetVT == MVT::i8)
- return FPTOUINT_F32_I8;
- if (RetVT == MVT::i16)
- return FPTOUINT_F32_I16;
if (RetVT == MVT::i32)
return FPTOUINT_F32_I32;
if (RetVT == MVT::i64)
@@ -562,10 +542,6 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::i128)
return FPTOUINT_F32_I128;
} else if (OpVT == MVT::f64) {
- if (RetVT == MVT::i8)
- return FPTOUINT_F64_I8;
- if (RetVT == MVT::i16)
- return FPTOUINT_F64_I16;
if (RetVT == MVT::i32)
return FPTOUINT_F64_I32;
if (RetVT == MVT::i64)
@@ -758,17 +734,13 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
SelectIsExpensive = false;
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
- IntDivIsCheap = false;
FsqrtIsCheap = false;
- Pow2SDivIsCheap = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
MaskAndBranchFoldingIsLegal = false;
EnableExtLdPromotion = false;
HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
- ExceptionPointerRegister = 0;
- ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
BooleanFloatContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
@@ -778,6 +750,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MinFunctionAlignment = 0;
PrefFunctionAlignment = 0;
PrefLoopAlignment = 0;
+ GatherAllAliasesMaxDepth = 6;
MinStackArgumentAlignment = 1;
InsertFencesForAtomic = false;
MinimumJumpTableEntries = 4;
@@ -814,6 +787,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Expand);
setOperationAction(ISD::FMAXNUM, VT, Expand);
+ setOperationAction(ISD::FMINNAN, VT, Expand);
+ setOperationAction(ISD::FMAXNAN, VT, Expand);
setOperationAction(ISD::FMAD, VT, Expand);
setOperationAction(ISD::SMIN, VT, Expand);
setOperationAction(ISD::SMAX, VT, Expand);
@@ -828,6 +803,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMULO, VT, Expand);
setOperationAction(ISD::UMULO, VT, Expand);
+ setOperationAction(ISD::BITREVERSE, VT, Expand);
+
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
@@ -838,11 +815,17 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
}
+
+ // For most targets @llvm.get.dynamic.area.offest just returns 0.
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+ // Most targets also ignore the @llvm.readcyclecounter intrinsic.
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
+
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
@@ -1111,6 +1094,19 @@ MachineBasicBlock*
TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
MachineBasicBlock *MBB) const {
MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ // We're handling multiple types of operands here:
+ // PATCHPOINT MetaArgs - live-in, read only, direct
+ // STATEPOINT Deopt Spill - live-through, read only, indirect
+ // STATEPOINT Deopt Alloca - live-through, read only, direct
+ // (We're currently conservative and mark the deopt slots read/write in
+ // practice.)
+ // STATEPOINT GC Spill - live-through, read/write, indirect
+ // STATEPOINT GC Alloca - live-through, read/write, direct
+ // The live-in vs live-through is handled already (the live through ones are
+ // all stack slots), but we need to handle the different type of stackmap
+ // operands and memory effects here.
// MI changes inside this loop as we grow operands.
for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) {
@@ -1126,10 +1122,24 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
// Copy operands before the frame-index.
for (unsigned i = 0; i < OperIdx; ++i)
MIB.addOperand(MI->getOperand(i));
- // Add frame index operands: direct-mem-ref tag, #FI, offset.
- MIB.addImm(StackMaps::DirectMemRefOp);
- MIB.addOperand(MI->getOperand(OperIdx));
- MIB.addImm(0);
+ // Add frame index operands recognized by stackmaps.cpp
+ if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
+ // indirect-mem-ref tag, size, #FI, offset.
+ // Used for spills inserted by StatepointLowering. This codepath is not
+ // used for patchpoints/stackmaps at all, for these spilling is done via
+ // foldMemoryOperand callback only.
+ assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
+ MIB.addImm(StackMaps::IndirectMemRefOp);
+ MIB.addImm(MFI.getObjectSize(FI));
+ MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.addImm(0);
+ } else {
+ // direct-mem-ref tag, #FI, offset.
+ // Used by patchpoint, and direct alloca arguments to statepoints
+ MIB.addImm(StackMaps::DirectMemRefOp);
+ MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.addImm(0);
+ }
// Copy the operands after the frame index.
for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i)
MIB.addOperand(MI->getOperand(i));
@@ -1139,7 +1149,6 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
// Add a new memory operand for this FI.
- const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
unsigned Flags = MachineMemOperand::MOLoad;
@@ -1148,8 +1157,8 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
Flags |= MachineMemOperand::MOVolatile;
}
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI), Flags,
- TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI));
+ MachinePointerInfo::getFixedStack(MF, FI), Flags,
+ MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
MIB->addMemOperand(MF, MMO);
// Replace the instruction and update the operand index.
@@ -1274,20 +1283,14 @@ void TargetLoweringBase::computeRegisterProperties(
ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
}
+ // Decide how to handle f16. If the target does not have native f16 support,
+ // promote it to f32, because there are no f16 library calls (except for
+ // conversions).
if (!isTypeLegal(MVT::f16)) {
- // If the target has native f32 support, promote f16 operations to f32. If
- // f32 is not supported, generate soft float library calls.
- if (isTypeLegal(MVT::f32)) {
- NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
- RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
- TransformToType[MVT::f16] = MVT::f32;
- ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
- } else {
- NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
- RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
- TransformToType[MVT::f16] = MVT::i16;
- ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
- }
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+ TransformToType[MVT::f16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
}
// Loop over all of the vector value types to see which need transformations.
@@ -1528,6 +1531,29 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
return DL.getABITypeAlignment(Ty);
}
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ unsigned AddrSpace,
+ unsigned Alignment,
+ bool *Fast) const {
+ // Check if the specified alignment is sufficient based on the data layout.
+ // TODO: While using the data layout works in practice, a better solution
+ // would be to implement this check directly (make this a virtual function).
+ // For example, the ABI alignment may change based on software platform while
+ // this function should only be affected by hardware implementation.
+ Type *Ty = VT.getTypeForEVT(Context);
+ if (Alignment >= DL.getABITypeAlignment(Ty)) {
+ // Assume that an access that meets the ABI-specified alignment is fast.
+ if (Fast != nullptr)
+ *Fast = true;
+ return true;
+ }
+
+ // This is a misaligned access.
+ return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
+}
+
+
//===----------------------------------------------------------------------===//
// TargetTransformInfo Helpers
//===----------------------------------------------------------------------===//
@@ -1546,6 +1572,11 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case Invoke: return 0;
case Resume: return 0;
case Unreachable: return 0;
+ case CleanupRet: return 0;
+ case CatchRet: return 0;
+ case CatchPad: return 0;
+ case CatchSwitch: return 0;
+ case CleanupPad: return 0;
case Add: return ISD::ADD;
case FAdd: return ISD::FADD;
case Sub: return ISD::SUB;
@@ -1603,13 +1634,13 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
llvm_unreachable("Unknown instruction type encountered!");
}
-std::pair<unsigned, MVT>
+std::pair<int, MVT>
TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const {
LLVMContext &C = Ty->getContext();
EVT MTy = getValueType(DL, Ty);
- unsigned Cost = 1;
+ int Cost = 1;
// We keep legalizing the type until we find a legal kind. We assume that
// the only operation that costs anything is the split. After splitting
// we need to handle two types.
@@ -1622,11 +1653,28 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
Cost *= 2;
+ // Do not loop with f128 type.
+ if (MTy == LK.second)
+ return std::make_pair(Cost, MTy.getSimpleVT());
+
// Keep legalizing the type.
MTy = LK.second;
}
}
+Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+ if (!TM.getTargetTriple().isAndroid())
+ return nullptr;
+
+ // Android provides a libc function to retrieve the address of the current
+ // thread's unsafe stack pointer.
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+ Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
+ StackPtrTy->getPointerTo(0), nullptr);
+ return IRB.CreateCall(Fn);
+}
+
//===----------------------------------------------------------------------===//
// Loop Strength Reduction hooks
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2f78763..58ae9cc 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
@@ -32,6 +33,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -58,9 +60,8 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
report_fatal_error("We do not support this DWARF encoding yet!");
}
-void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
- const TargetMachine &TM,
- const MCSymbol *Sym) const {
+void TargetLoweringObjectFileELF::emitPersonalityValue(
+ MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const {
SmallString<64> NameData("DW.ref.");
NameData += Sym->getName();
MCSymbolELF *Label =
@@ -72,9 +73,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS,
Flags, 0, Label->getName());
- unsigned Size = TM.getDataLayout()->getPointerSize();
+ unsigned Size = DL.getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+ Streamer.EmitValueToAlignment(DL.getPointerABIAlignment());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::create(Size, getContext());
Streamer.emitELFSize(Label, E);
@@ -232,14 +233,8 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".tdata";
if (Kind.isThreadBSS())
return ".tbss";
- if (Kind.isDataNoRel())
+ if (Kind.isData())
return ".data";
- if (Kind.isDataRelLocal())
- return ".data.rel.local";
- if (Kind.isDataRel())
- return ".data.rel";
- if (Kind.isReadOnlyWithRelLocal())
- return ".data.rel.ro.local";
assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
return ".data.rel.ro";
}
@@ -282,8 +277,8 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
// We also need alignment here.
// FIXME: this is getting the alignment of the character, not the
// alignment of the global!
- unsigned Align =
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
+ unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GV));
std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
Name = SizeSpec + utostr(Align);
@@ -350,9 +345,8 @@ bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
/// Given a mergeable constant with the specified size and relocation
/// information, return a section that it should be placed in.
-MCSection *
-TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
if (Kind.isMergeableConst4() && MergeableConst4Section)
return MergeableConst4Section;
if (Kind.isMergeableConst8() && MergeableConst8Section)
@@ -362,7 +356,6 @@ TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
if (Kind.isReadOnly())
return ReadOnlySection;
- if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
return DataRelROSection;
}
@@ -507,7 +500,7 @@ emitModuleFlags(MCStreamer &Streamer,
// Get the section.
MCSectionMachO *S = getContext().getMachOSection(
- Segment, Section, TAA, StubSize, SectionKind::getDataNoRel());
+ Segment, Section, TAA, StubSize, SectionKind::getData());
Streamer.SwitchSection(S);
Streamer.EmitLabel(getContext().
getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
@@ -589,14 +582,16 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
// FIXME: Alignment check should be handled by section classifier.
if (Kind.isMergeable1ByteCString() &&
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ GV->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
return CStringSection;
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ GV->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
return UStringSection;
// With MachO only variables whose corresponding symbol starts with 'l' or
@@ -634,12 +629,11 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
return DataSection;
}
-MCSection *
-TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
// If this constant requires a relocation, we have to put it in the data
// segment, not in the text segment.
- if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+ if (Kind.isData() || Kind.isReadOnlyWithRel())
return ConstDataSection;
if (Kind.isMergeableConst4())
@@ -706,7 +700,7 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
- // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation
+ // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation
// as 64-bit do, we replace the GOT equivalent by accessing the final symbol
// through a non_lazy_ptr stub instead. One advantage is that it allows the
// computation of deltas to final external symbols. Example:
@@ -740,7 +734,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
// non_lazy_ptr stubs.
SmallString<128> Name;
StringRef Suffix = "$non_lazy_ptr";
- Name += DL->getPrivateGlobalPrefix();
+ Name += MMI->getModule()->getDataLayout().getPrivateGlobalPrefix();
Name += Sym->getName();
Name += Suffix;
MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
@@ -763,6 +757,29 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
return MCBinaryExpr::createSub(LHS, RHS, Ctx);
}
+static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
+ const MCSection &Section) {
+ if (!AsmInfo.isSectionAtomizableBySymbols(Section))
+ return true;
+
+ // If it is not dead stripped, it is safe to use private labels.
+ const MCSectionMachO &SMO = cast<MCSectionMachO>(Section);
+ if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
+ return true;
+
+ return false;
+}
+
+void TargetLoweringObjectFileMachO::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ const TargetMachine &TM) const {
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+ const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM);
+ bool CannotUsePrivateLabel =
+ !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
+ Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
//===----------------------------------------------------------------------===//
// COFF
//===----------------------------------------------------------------------===//
@@ -918,7 +935,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
COMDATSymName, Selection);
} else {
SmallString<256> TmpData;
- getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM);
+ Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true);
return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
Selection);
}
@@ -943,8 +960,9 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
}
void TargetLoweringObjectFileCOFF::getNameWithPrefix(
- SmallVectorImpl<char> &OutName, const GlobalValue *GV,
- bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ const TargetMachine &TM) const {
+ bool CannotUsePrivateLabel = false;
if (GV->hasPrivateLinkage() &&
((isa<Function>(GV) && TM.getFunctionSections()) ||
(isa<GlobalVariable>(GV) && TM.getDataSections())))
@@ -1043,7 +1061,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
raw_string_ostream FlagOS(Flag);
Mang.getNameWithPrefix(FlagOS, GV, false);
FlagOS.flush();
- if (Flag[0] == DL->getGlobalPrefix())
+ if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
OS << Flag.substr(1);
else
OS << Flag;
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 61a66b6..0a7042a 100644
--- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -11,13 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "target-reg-info"
using namespace llvm;
@@ -34,54 +40,71 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
TargetRegisterInfo::~TargetRegisterInfo() {}
-void PrintReg::print(raw_ostream &OS) const {
- if (!Reg)
- OS << "%noreg";
- else if (TargetRegisterInfo::isStackSlot(Reg))
- OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
- else if (TargetRegisterInfo::isVirtualRegister(Reg))
- OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
- else if (TRI && Reg < TRI->getNumRegs())
- OS << '%' << TRI->getName(Reg);
- else
- OS << "%physreg" << Reg;
- if (SubIdx) {
- if (TRI)
- OS << ':' << TRI->getSubRegIndexName(SubIdx);
+namespace llvm {
+
+Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
+ unsigned SubIdx) {
+ return Printable([Reg, TRI, SubIdx](raw_ostream &OS) {
+ if (!Reg)
+ OS << "%noreg";
+ else if (TargetRegisterInfo::isStackSlot(Reg))
+ OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (TRI && Reg < TRI->getNumRegs())
+ OS << '%' << TRI->getName(Reg);
else
- OS << ":sub(" << SubIdx << ')';
- }
+ OS << "%physreg" << Reg;
+ if (SubIdx) {
+ if (TRI)
+ OS << ':' << TRI->getSubRegIndexName(SubIdx);
+ else
+ OS << ":sub(" << SubIdx << ')';
+ }
+ });
}
-void PrintRegUnit::print(raw_ostream &OS) const {
- // Generic printout when TRI is missing.
- if (!TRI) {
- OS << "Unit~" << Unit;
- return;
- }
+Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+ return Printable([Unit, TRI](raw_ostream &OS) {
+ // Generic printout when TRI is missing.
+ if (!TRI) {
+ OS << "Unit~" << Unit;
+ return;
+ }
- // Check for invalid register units.
- if (Unit >= TRI->getNumRegUnits()) {
- OS << "BadUnit~" << Unit;
- return;
- }
+ // Check for invalid register units.
+ if (Unit >= TRI->getNumRegUnits()) {
+ OS << "BadUnit~" << Unit;
+ return;
+ }
- // Normal units have at least one root.
- MCRegUnitRootIterator Roots(Unit, TRI);
- assert(Roots.isValid() && "Unit has no roots.");
- OS << TRI->getName(*Roots);
- for (++Roots; Roots.isValid(); ++Roots)
- OS << '~' << TRI->getName(*Roots);
+ // Normal units have at least one root.
+ MCRegUnitRootIterator Roots(Unit, TRI);
+ assert(Roots.isValid() && "Unit has no roots.");
+ OS << TRI->getName(*Roots);
+ for (++Roots; Roots.isValid(); ++Roots)
+ OS << '~' << TRI->getName(*Roots);
+ });
}
-void PrintVRegOrUnit::print(raw_ostream &OS) const {
- if (TRI && TRI->isVirtualRegister(Unit)) {
- OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
- return;
- }
- PrintRegUnit::print(OS);
+Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+ return Printable([Unit, TRI](raw_ostream &OS) {
+ if (TRI && TRI->isVirtualRegister(Unit)) {
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
+ } else {
+ OS << PrintRegUnit(Unit, TRI);
+ }
+ });
+}
+
+Printable PrintLaneMask(LaneBitmask LaneMask) {
+ return Printable([LaneMask](raw_ostream &OS) {
+ OS << format("%08X", LaneMask);
+ });
}
+} // End of llvm namespace
+
/// getAllocatableClass - Return the maximal subclass of the given register
/// class that is alloctable, or NULL.
const TargetRegisterClass *
@@ -161,16 +184,24 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
static inline
const TargetRegisterClass *firstCommonClass(const uint32_t *A,
const uint32_t *B,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI,
+ const MVT::SimpleValueType SVT =
+ MVT::SimpleValueType::Any) {
+ const MVT VT(SVT);
for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
- if (unsigned Common = *A++ & *B++)
- return TRI->getRegClass(I + countTrailingZeros(Common));
+ if (unsigned Common = *A++ & *B++) {
+ const TargetRegisterClass *RC =
+ TRI->getRegClass(I + countTrailingZeros(Common));
+ if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT))
+ return RC;
+ }
return nullptr;
}
const TargetRegisterClass *
TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B) const {
+ const TargetRegisterClass *B,
+ const MVT::SimpleValueType SVT) const {
// First take care of the trivial cases.
if (A == B)
return A;
@@ -179,7 +210,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
// Register classes are ordered topologically, so the largest common
// sub-class it the common sub-class with the smallest ID.
- return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
+ return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT);
}
const TargetRegisterClass *
@@ -260,13 +291,55 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
return BestRC;
}
+/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
+/// share the same register file.
+static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) {
+ // Same register class.
+ if (DefRC == SrcRC)
+ return true;
+
+ // Both operands are sub registers. Check if they share a register class.
+ unsigned SrcIdx, DefIdx;
+ if (SrcSubReg && DefSubReg) {
+ return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
+ SrcIdx, DefIdx) != nullptr;
+ }
+
+ // At most one of the register is a sub register, make it Src to avoid
+ // duplicating the test.
+ if (!SrcSubReg) {
+ std::swap(DefSubReg, SrcSubReg);
+ std::swap(DefRC, SrcRC);
+ }
+
+ // One of the register is a sub register, check if we can get a superclass.
+ if (SrcSubReg)
+ return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
+
+ // Plain copy.
+ return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
+}
+
+bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
+ // If this source does not incur a cross register bank copy, use it.
+ return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg);
+}
+
// Compute target-independent register allocator hints to help eliminate copies.
void
TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF,
- const VirtRegMap *VRM) const {
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
@@ -295,6 +368,26 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
Hints.push_back(Phys);
}
+bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ return !MF.getFunction()->hasFnAttribute("no-realign-stack");
+}
+
+bool TargetRegisterInfo::needsStackRealignment(
+ const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = TFI->getStackAlignment();
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttribute(Attribute::StackAlignment));
+ if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) {
+ if (canRealignStack(MF))
+ return true;
+ DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n");
+ }
+ return false;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void
TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 299380d..fc65639 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -211,11 +211,9 @@ unsigned TargetSchedModel::computeOperandLatency(
if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
&& !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
&& SchedModel.isComplete()) {
- std::string Err;
- raw_string_ostream ss(Err);
- ss << "DefIdx " << DefIdx << " exceeds machine model writes for "
- << *DefMI;
- report_fatal_error(ss.str());
+ errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
+ << *DefMI;
+ llvm_unreachable("incomplete machine model");
}
#endif
// FIXME: Automatically giving all implicit defs defaultDefLatency is
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1e30821..c6bae24 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -83,21 +83,20 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// The current basic block being processed.
MachineBasicBlock *MBB;
- // DistanceMap - Keep track the distance of a MI from the start of the
- // current basic block.
+ // Keep track the distance of a MI from the start of the current basic block.
DenseMap<MachineInstr*, unsigned> DistanceMap;
// Set of already processed instructions in the current block.
SmallPtrSet<MachineInstr*, 8> Processed;
- // SrcRegMap - A map from virtual registers to physical registers which are
- // likely targets to be coalesced to due to copies from physical registers to
- // virtual registers. e.g. v1024 = move r0.
+ // A map from virtual registers to physical registers which are likely targets
+ // to be coalesced to due to copies from physical registers to virtual
+ // registers. e.g. v1024 = move r0.
DenseMap<unsigned, unsigned> SrcRegMap;
- // DstRegMap - A map from virtual registers to physical registers which are
- // likely targets to be coalesced to due to copies to physical registers from
- // virtual registers. e.g. r1 = move v1024.
+ // A map from virtual registers to physical registers which are likely targets
+ // to be coalesced to due to copies to physical registers from virtual
+ // registers. e.g. r1 = move v1024.
DenseMap<unsigned, unsigned> DstRegMap;
bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
@@ -110,8 +109,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
MachineInstr *MI, unsigned Dist);
- bool commuteInstruction(MachineBasicBlock::iterator &mi,
- unsigned RegB, unsigned RegC, unsigned Dist);
+ bool commuteInstruction(MachineInstr *MI,
+ unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
@@ -133,6 +132,11 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
unsigned SrcIdx, unsigned DstIdx,
unsigned Dist, bool shouldOnlyCommute);
+ bool tryInstructionCommute(MachineInstr *MI,
+ unsigned DstOpIdx,
+ unsigned BaseOpIdx,
+ bool BaseOpKilled,
+ unsigned Dist);
void scanUses(unsigned DstReg);
void processCopy(MachineInstr *MI);
@@ -151,7 +155,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<LiveVariables>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
@@ -160,7 +164,7 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
- /// runOnMachineFunction - Pass entry point.
+ /// Pass entry point.
bool runOnMachineFunction(MachineFunction&) override;
};
} // end anonymous namespace
@@ -168,7 +172,7 @@ public:
char TwoAddressInstructionPass::ID = 0;
INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
"Two-Address instruction pass", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
"Two-Address instruction pass", false, false)
@@ -176,10 +180,9 @@ char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
-/// sink3AddrInstruction - A two-address instruction has been converted to a
-/// three-address instruction to avoid clobbering a register. Try to sink it
-/// past the instruction that would kill the above mentioned register to reduce
-/// register pressure.
+/// A two-address instruction has been converted to a three-address instruction
+/// to avoid clobbering a register. Try to sink it past the instruction that
+/// would kill the above mentioned register to reduce register pressure.
bool TwoAddressInstructionPass::
sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
MachineBasicBlock::iterator OldPos) {
@@ -195,8 +198,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
unsigned DefReg = 0;
SmallSet<unsigned, 4> UseRegs;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -231,10 +233,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
KillMI = LIS->getInstructionFromIndex(I->end);
}
if (!KillMI) {
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(SavedReg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineOperand &UseMO = *UI;
+ for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) {
if (!UseMO.isKill())
continue;
KillMI = UseMO.getParent();
@@ -312,8 +311,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
return true;
}
-/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg
-/// in current BB.
+/// Return the MachineInstr* if it is the single def of the Reg in current BB.
static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
const MachineRegisterInfo *MRI) {
MachineInstr *Ret = nullptr;
@@ -351,10 +349,10 @@ bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
return false;
}
-/// noUseAfterLastDef - Return true if there are no intervening uses between the
-/// last instruction in the MBB that defines the specified register and the
-/// two-address instruction which is being processed. It also returns the last
-/// def location by reference
+/// Return true if there are no intervening uses between the last instruction
+/// in the MBB that defines the specified register and the two-address
+/// instruction which is being processed. It also returns the last def location
+/// by reference.
bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
unsigned &LastDef) {
LastDef = 0;
@@ -375,9 +373,9 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
return !(LastUse > LastDef && LastUse < Dist);
}
-/// isCopyToReg - Return true if the specified MI is a copy instruction or
-/// a extract_subreg instruction. It also returns the source and destination
-/// registers and whether they are physical registers by reference.
+/// Return true if the specified MI is a copy instruction or an extract_subreg
+/// instruction. It also returns the source and destination registers and
+/// whether they are physical registers by reference.
static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
unsigned &SrcReg, unsigned &DstReg,
bool &IsSrcPhys, bool &IsDstPhys) {
@@ -397,8 +395,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
return true;
}
-/// isPLainlyKilled - Test if the given register value, which is used by the
-// given instruction, is killed by the given instruction.
+/// Test if the given register value, which is used by the
+/// given instruction, is killed by the given instruction.
static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
LiveIntervals *LIS) {
if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
@@ -424,7 +422,7 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
return MI->killsRegister(Reg);
}
-/// isKilled - Test if the given register value, which is used by the given
+/// Test if the given register value, which is used by the given
/// instruction, is killed by the given instruction. This looks through
/// coalescable copies to see if the original value is potentially not killed.
///
@@ -472,8 +470,8 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
}
}
-/// isTwoAddrUse - Return true if the specified MI uses the specified register
-/// as a two-address use. If so, return the destination register by reference.
+/// Return true if the specified MI uses the specified register as a two-address
+/// use. If so, return the destination register by reference.
static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
const MachineOperand &MO = MI.getOperand(i);
@@ -488,8 +486,8 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
return false;
}
-/// findOnlyInterestingUse - Given a register, if has a single in-basic block
-/// use, return the use instruction if it's a copy or a two-address use.
+/// Given a register, if has a single in-basic block use, return the use
+/// instruction if it's a copy or a two-address use.
static
MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
MachineRegisterInfo *MRI,
@@ -516,8 +514,8 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
return nullptr;
}
-/// getMappedReg - Return the physical register the specified virtual register
-/// might be mapped to.
+/// Return the physical register the specified virtual register might be mapped
+/// to.
static unsigned
getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
while (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -531,8 +529,7 @@ getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
return 0;
}
-/// regsAreCompatible - Return true if the two registers are equal or aliased.
-///
+/// Return true if the two registers are equal or aliased.
static bool
regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
if (RegA == RegB)
@@ -543,8 +540,8 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
}
-/// isProfitableToCommute - Return true if it's potentially profitable to commute
-/// the two-address instruction that's being processed.
+/// Return true if it's potentially profitable to commute the two-address
+/// instruction that's being processed.
bool
TwoAddressInstructionPass::
isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
@@ -642,15 +639,15 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
return LastDefB && LastDefC && LastDefC > LastDefB;
}
-/// commuteInstruction - Commute a two-address instruction and update the basic
-/// block, distance map, and live variables if needed. Return true if it is
-/// successful.
-bool TwoAddressInstructionPass::
-commuteInstruction(MachineBasicBlock::iterator &mi,
- unsigned RegB, unsigned RegC, unsigned Dist) {
- MachineInstr *MI = mi;
+/// Commute a two-address instruction and update the basic block, distance map,
+/// and live variables if needed. Return true if it is successful.
+bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
+ unsigned RegBIdx,
+ unsigned RegCIdx,
+ unsigned Dist) {
+ unsigned RegC = MI->getOperand(RegCIdx).getReg();
DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
- MachineInstr *NewMI = TII->commuteInstruction(MI);
+ MachineInstr *NewMI = TII->commuteInstruction(MI, false, RegBIdx, RegCIdx);
if (NewMI == nullptr) {
DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
@@ -672,8 +669,8 @@ commuteInstruction(MachineBasicBlock::iterator &mi,
return true;
}
-/// isProfitableToConv3Addr - Return true if it is profitable to convert the
-/// given 2-address instruction to a 3-address one.
+/// Return true if it is profitable to convert the given 2-address instruction
+/// to a 3-address one.
bool
TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
// Look for situations like this:
@@ -689,17 +686,18 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
}
-/// convertInstTo3Addr - Convert the specified two-address instruction into a
-/// three address one. Return true if this transformation was successful.
+/// Convert the specified two-address instruction into a three address one.
+/// Return true if this transformation was successful.
bool
TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned RegA, unsigned RegB,
unsigned Dist) {
// FIXME: Why does convertToThreeAddress() need an iterator reference?
- MachineFunction::iterator MFI = MBB;
+ MachineFunction::iterator MFI = MBB->getIterator();
MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV);
- assert(MBB == MFI && "convertToThreeAddress changed iterator reference");
+ assert(MBB->getIterator() == MFI &&
+ "convertToThreeAddress changed iterator reference");
if (!NewMI)
return false;
@@ -730,8 +728,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
return true;
}
-/// scanUses - Scan forward recursively for only uses, update maps if the use
-/// is a copy or a two-address instruction.
+/// Scan forward recursively for only uses, update maps if the use is a copy or
+/// a two-address instruction.
void
TwoAddressInstructionPass::scanUses(unsigned DstReg) {
SmallVector<unsigned, 4> VirtRegPairs;
@@ -777,8 +775,8 @@ TwoAddressInstructionPass::scanUses(unsigned DstReg) {
}
}
-/// processCopy - If the specified instruction is not yet processed, process it
-/// if it's a copy. For a copy instruction, we find the physical registers the
+/// If the specified instruction is not yet processed, process it if it's a
+/// copy. For a copy instruction, we find the physical registers the
/// source and destination registers might be mapped to. These are kept in
/// point-to maps used to determine future optimizations. e.g.
/// v1024 = mov r0
@@ -813,9 +811,9 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
return;
}
-/// rescheduleMIBelowKill - If there is one more local instruction that reads
-/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
-/// instruction in order to eliminate the need for the copy.
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the instruction below the kill instruction in order to
+/// eliminate the need for the copy.
bool TwoAddressInstructionPass::
rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
@@ -871,8 +869,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
SmallSet<unsigned, 2> Uses;
SmallSet<unsigned, 2> Kills;
SmallSet<unsigned, 2> Defs;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -914,8 +911,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
OtherMI->isBranch() || OtherMI->isTerminator())
// Don't move pass calls, etc.
return false;
- for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OtherMI->getOperand(i);
+ for (const MachineOperand &MO : OtherMI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -984,8 +980,8 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
return true;
}
-/// isDefTooClose - Return true if the re-scheduling will put the given
-/// instruction too close to the defs of its register dependencies.
+/// Return true if the re-scheduling will put the given instruction too close
+/// to the defs of its register dependencies.
bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
MachineInstr *MI) {
for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
@@ -1004,10 +1000,9 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
return false;
}
-/// rescheduleKillAboveMI - If there is one more local instruction that reads
-/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
-/// current two-address instruction in order to eliminate the need for the
-/// copy.
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the kill instruction above the current two-address
+/// instruction in order to eliminate the need for the copy.
bool TwoAddressInstructionPass::
rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
@@ -1055,8 +1050,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
SmallSet<unsigned, 2> Kills;
SmallSet<unsigned, 2> Defs;
SmallSet<unsigned, 2> LiveDefs;
- for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = KillMI->getOperand(i);
+ for (const MachineOperand &MO : KillMI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -1094,8 +1088,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Don't move pass calls, etc.
return false;
SmallVector<unsigned, 2> OtherDefs;
- for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OtherMI->getOperand(i);
+ for (const MachineOperand &MO : OtherMI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -1155,13 +1148,68 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
return true;
}
-/// tryInstructionTransform - For the case where an instruction has a single
-/// pair of tied register operands, attempt some transformations that may
-/// either eliminate the tied operands or improve the opportunities for
-/// coalescing away the register copy. Returns true if no copy needs to be
-/// inserted to untie mi's operands (either because they were untied, or
-/// because mi was rescheduled, and will be visited again later). If the
-/// shouldOnlyCommute flag is true, only instruction commutation is attempted.
+/// Tries to commute the operand 'BaseOpIdx' and some other operand in the
+/// given machine instruction to improve opportunities for coalescing and
+/// elimination of a register to register copy.
+///
+/// 'DstOpIdx' specifies the index of MI def operand.
+/// 'BaseOpKilled' specifies if the register associated with 'BaseOpIdx'
+/// operand is killed by the given instruction.
+/// The 'Dist' arguments provides the distance of MI from the start of the
+/// current basic block and it is used to determine if it is profitable
+/// to commute operands in the instruction.
+///
+/// Returns true if the transformation happened. Otherwise, returns false.
+bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
+ unsigned DstOpIdx,
+ unsigned BaseOpIdx,
+ bool BaseOpKilled,
+ unsigned Dist) {
+ unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
+ unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
+ unsigned OpsNum = MI->getDesc().getNumOperands();
+ unsigned OtherOpIdx = MI->getDesc().getNumDefs();
+ for (; OtherOpIdx < OpsNum; OtherOpIdx++) {
+ // The call of findCommutedOpIndices below only checks if BaseOpIdx
+ // and OtherOpIdx are commutable, it does not really search for
+ // other commutable operands and does not change the values of passed
+ // variables.
+ if (OtherOpIdx == BaseOpIdx ||
+ !TII->findCommutedOpIndices(MI, BaseOpIdx, OtherOpIdx))
+ continue;
+
+ unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
+ bool AggressiveCommute = false;
+
+ // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
+ // operands. This makes the live ranges of DstOp and OtherOp joinable.
+ bool DoCommute =
+ !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+
+ if (!DoCommute &&
+ isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) {
+ DoCommute = true;
+ AggressiveCommute = true;
+ }
+
+ // If it's profitable to commute, try to do so.
+ if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) {
+ ++NumCommuted;
+ if (AggressiveCommute)
+ ++NumAggrCommuted;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// For the case where an instruction has a single pair of tied register
+/// operands, attempt some transformations that may either eliminate the tied
+/// operands or improve the opportunities for coalescing away the register copy.
+/// Returns true if no copy needs to be inserted to untie mi's operands
+/// (either because they were untied, or because mi was rescheduled, and will
+/// be visited again later). If the shouldOnlyCommute flag is true, only
+/// instruction commutation is attempted.
bool TwoAddressInstructionPass::
tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
@@ -1181,51 +1229,18 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (TargetRegisterInfo::isVirtualRegister(regA))
scanUses(regA);
- // Check if it is profitable to commute the operands.
- unsigned SrcOp1, SrcOp2;
- unsigned regC = 0;
- unsigned regCIdx = ~0U;
- bool TryCommute = false;
- bool AggressiveCommute = false;
- if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
- TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
- if (SrcIdx == SrcOp1)
- regCIdx = SrcOp2;
- else if (SrcIdx == SrcOp2)
- regCIdx = SrcOp1;
-
- if (regCIdx != ~0U) {
- regC = MI.getOperand(regCIdx).getReg();
- if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false))
- // If C dies but B does not, swap the B and C operands.
- // This makes the live ranges of A and C joinable.
- TryCommute = true;
- else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) {
- TryCommute = true;
- AggressiveCommute = true;
- }
- }
- }
+ bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
// If the instruction is convertible to 3 Addr, instead
// of returning try 3 Addr transformation aggresively and
// use this variable to check later. Because it might be better.
// For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
// instead of the following code.
- // addl %esi, %edi
- // movl %edi, %eax
+ // addl %esi, %edi
+ // movl %edi, %eax
// ret
- bool Commuted = false;
-
- // If it's profitable to commute, try to do so.
- if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
- Commuted = true;
- ++NumCommuted;
- if (AggressiveCommute)
- ++NumAggrCommuted;
- if (!MI.isConvertibleTo3Addr())
- return false;
- }
+ if (Commuted && !MI.isConvertibleTo3Addr())
+ return false;
if (shouldOnlyCommute)
return false;
@@ -1237,6 +1252,13 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
return true;
}
+ // If we commuted, regB may have changed so we should re-sample it to avoid
+ // confusing the three address conversion below.
+ if (Commuted) {
+ regB = MI.getOperand(SrcIdx).getReg();
+ regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+ }
+
if (MI.isConvertibleTo3Addr()) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
@@ -1348,10 +1370,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
SmallVector<unsigned, 4> OrigRegs;
if (LIS) {
- for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
- MOE = MI.operands_end(); MOI != MOE; ++MOI) {
- if (MOI->isReg())
- OrigRegs.push_back(MOI->getReg());
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg())
+ OrigRegs.push_back(MO.getReg());
}
}
@@ -1536,12 +1557,10 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
SrcRegMap[RegA] = RegB;
}
-
if (AllUsesCopied) {
if (!IsEarlyClobber) {
// Replace other (un-tied) uses of regB with LastCopiedReg.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
MO.isUse()) {
if (MO.isKill()) {
@@ -1578,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// regB is still used in this instruction, but a kill flag was
// removed from a different tied use of regB, so now we need to add
// a kill flag to one of the remaining uses of regB.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
MO.setIsKill(true);
break;
@@ -1588,8 +1606,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
}
-/// runOnMachineFunction - Reduce two-address instructions to two operands.
-///
+/// Reduce two-address instructions to two operands.
bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const TargetMachine &TM = MF->getTarget();
@@ -1599,7 +1616,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
InstrItins = MF->getSubtarget().getInstrItineraryData();
LV = getAnalysisIfAvailable<LiveVariables>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
OptLevel = TM.getOptLevel();
bool MadeChange = false;
@@ -1614,7 +1631,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
TiedOperandMap TiedOperands;
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
- MBB = MBBI;
+ MBB = &*MBBI;
unsigned Dist = 0;
DistanceMap.clear();
SrcRegMap.clear();
@@ -1661,8 +1678,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
unsigned DstReg = mi->getOperand(DstIdx).getReg();
if (SrcReg != DstReg &&
tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
- // The tied operands have been eliminated or shifted further down the
- // block to ease elimination. Continue processing with 'nmi'.
+ // The tied operands have been eliminated or shifted further down
+ // the block to ease elimination. Continue processing with 'nmi'.
TiedOperands.clear();
mi = nmi;
continue;
@@ -1671,9 +1688,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
}
// Now iterate over the information collected above.
- for (TiedOperandMap::iterator OI = TiedOperands.begin(),
- OE = TiedOperands.end(); OI != OE; ++OI) {
- processTiedPairs(mi, OI->second, Dist);
+ for (auto &TO : TiedOperands) {
+ processTiedPairs(mi, TO.second, Dist);
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
}
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index d393e10..8c9631e 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -71,8 +71,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
// in them.
std::vector<BasicBlock*> DeadBlocks;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- if (!Reachable.count(I)) {
- BasicBlock *BB = I;
+ if (!Reachable.count(&*I)) {
+ BasicBlock *BB = &*I;
DeadBlocks.push_back(BB);
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
@@ -131,7 +131,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// in them.
std::vector<MachineBasicBlock*> DeadBlocks;
for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- MachineBasicBlock *BB = I;
+ MachineBasicBlock *BB = &*I;
// Test for deadness.
if (!Reachable.count(BB)) {
@@ -167,7 +167,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// Cleanup PHI nodes.
for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- MachineBasicBlock *BB = I;
+ MachineBasicBlock *BB = &*I;
// Prune unneeded PHI entries.
SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
BB->pred_end());
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 2912bdd..bf1c0dc 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -163,11 +163,12 @@ class VirtRegRewriter : public MachineFunctionPass {
SlotIndexes *Indexes;
LiveIntervals *LIS;
VirtRegMap *VRM;
- SparseSet<unsigned> PhysRegs;
void rewrite();
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
+ void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
+
public:
static char ID;
VirtRegRewriter() : MachineFunctionPass(ID) {}
@@ -237,10 +238,52 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
return true;
}
+void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
+ unsigned PhysReg) const {
+ assert(!LI.empty());
+ assert(LI.hasSubRanges());
+
+ typedef std::pair<const LiveInterval::SubRange *,
+ LiveInterval::const_iterator> SubRangeIteratorPair;
+ SmallVector<SubRangeIteratorPair, 4> SubRanges;
+ SlotIndex First;
+ SlotIndex Last;
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ SubRanges.push_back(std::make_pair(&SR, SR.begin()));
+ if (!First.isValid() || SR.segments.front().start < First)
+ First = SR.segments.front().start;
+ if (!Last.isValid() || SR.segments.back().end > Last)
+ Last = SR.segments.back().end;
+ }
+
+ // Check all mbb start positions between First and Last while
+ // simulatenously advancing an iterator for each subrange.
+ for (SlotIndexes::MBBIndexIterator MBBI = Indexes->findMBBIndex(First);
+ MBBI != Indexes->MBBIndexEnd() && MBBI->first <= Last; ++MBBI) {
+ SlotIndex MBBBegin = MBBI->first;
+ // Advance all subrange iterators so that their end position is just
+ // behind MBBBegin (or the iterator is at the end).
+ LaneBitmask LaneMask = 0;
+ for (auto &RangeIterPair : SubRanges) {
+ const LiveInterval::SubRange *SR = RangeIterPair.first;
+ LiveInterval::const_iterator &SRI = RangeIterPair.second;
+ while (SRI != SR->end() && SRI->end <= MBBBegin)
+ ++SRI;
+ if (SRI == SR->end())
+ continue;
+ if (SRI->start <= MBBBegin)
+ LaneMask |= SR->LaneMask;
+ }
+ if (LaneMask == 0)
+ continue;
+ MachineBasicBlock *MBB = MBBI->second;
+ MBB->addLiveIn(PhysReg, LaneMask);
+ }
+}
+
// Compute MBB live-in lists from virtual register live ranges and their
// assignments.
void VirtRegRewriter::addMBBLiveIns() {
- SmallVector<MachineBasicBlock*, 16> LiveIn;
for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
if (MRI->reg_nodbg_empty(VirtReg))
@@ -254,31 +297,18 @@ void VirtRegRewriter::addMBBLiveIns() {
assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
if (LI.hasSubRanges()) {
- for (LiveInterval::SubRange &S : LI.subranges()) {
- for (const auto &Seg : S.segments) {
- if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
- continue;
- for (MCSubRegIndexIterator SR(PhysReg, TRI); SR.isValid(); ++SR) {
- unsigned SubReg = SR.getSubReg();
- unsigned SubRegIndex = SR.getSubRegIndex();
- unsigned SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex);
- if ((SubRegLaneMask & S.LaneMask) == 0)
- continue;
- for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
- LiveIn[i]->addLiveIn(SubReg);
- }
- }
- LiveIn.clear();
- }
- }
+ addLiveInsForSubRanges(LI, PhysReg);
} else {
- // Scan the segments of LI.
- for (const auto &Seg : LI.segments) {
- if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
- continue;
- for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
- LiveIn[i]->addLiveIn(PhysReg);
- LiveIn.clear();
+ // Go over MBB begin positions and see if we have segments covering them.
+ // The following works because segments and the MBBIndex list are both
+ // sorted by slot indexes.
+ SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin();
+ for (const auto &Seg : LI) {
+ I = Indexes->advanceMBBIndex(I, Seg.start);
+ for (; I != Indexes->MBBIndexEnd() && I->first < Seg.end; ++I) {
+ MachineBasicBlock *MBB = I->second;
+ MBB->addLiveIn(PhysReg);
+ }
}
}
}
@@ -305,7 +335,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
assert(LI.liveAt(BaseIndex) &&
"Reads of completely dead register should be marked undef already");
unsigned SubRegIdx = MO.getSubReg();
- unsigned UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
// See if any of the relevant subregister liveranges is defined at this point.
for (const LiveInterval::SubRange &SR : LI.subranges()) {
if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex))
@@ -319,54 +349,15 @@ void VirtRegRewriter::rewrite() {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
- SmallPtrSet<const MachineInstr *, 4> NoReturnInsts;
-
- // Here we have a SparseSet to hold which PhysRegs are actually encountered
- // in the MF we are about to iterate over so that later when we call
- // setPhysRegUsed, we are only doing it for physRegs that were actually found
- // in the program and not for all of the possible physRegs for the given
- // target architecture. If the target has a lot of physRegs, then for a small
- // program there will be a significant compile time reduction here.
- PhysRegs.clear();
- PhysRegs.setUniverse(TRI->getNumRegs());
-
- // The function with uwtable should guarantee that the stack unwinder
- // can unwind the stack to the previous frame. Thus, we can't apply the
- // noreturn optimization if the caller function has uwtable attribute.
- bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable);
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
DEBUG(MBBI->print(dbgs(), Indexes));
- bool IsExitBB = MBBI->succ_empty();
for (MachineBasicBlock::instr_iterator
MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
- MachineInstr *MI = MII;
+ MachineInstr *MI = &*MII;
++MII;
- // Check if this instruction is a call to a noreturn function. If this
- // is a call to noreturn function and we don't need the stack unwinding
- // functionality (i.e. this function does not have uwtable attribute and
- // the callee function has the nounwind attribute), then we can ignore
- // the definitions set by this instruction.
- if (!HasUWTable && IsExitBB && MI->isCall()) {
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- MachineOperand &MO = *MOI;
- if (!MO.isGlobal())
- continue;
- const Function *Func = dyn_cast<Function>(MO.getGlobal());
- if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) ||
- // We need to keep correct unwind information
- // even if the function will not return, since the
- // runtime may need it.
- !Func->hasFnAttribute(Attribute::NoUnwind))
- continue;
- NoReturnInsts.insert(MI);
- break;
- }
- }
-
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
MachineOperand &MO = *MOI;
@@ -375,15 +366,6 @@ void VirtRegRewriter::rewrite() {
if (MO.isRegMask())
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
- // If we encounter a VirtReg or PhysReg then get at the PhysReg and add
- // it to the physreg bitset. Later we use only the PhysRegs that were
- // actually encountered in the MF to populate the MRI's used physregs.
- if (MO.isReg() && MO.getReg())
- PhysRegs.insert(
- TargetRegisterInfo::isVirtualRegister(MO.getReg()) ?
- VRM->getPhys(MO.getReg()) :
- MO.getReg());
-
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
unsigned VirtReg = MO.getReg();
@@ -418,14 +400,6 @@ void VirtRegRewriter::rewrite() {
MO.setIsUndef(true);
} else if (!MO.isDead()) {
assert(MO.isDef());
- // Things get tricky when we ran out of lane mask bits and
- // merged multiple lanes into the overflow bit: In this case
- // our subregister liveness tracking isn't precise and we can't
- // know what subregister parts are undefined, fall back to the
- // implicit super-register def then.
- unsigned LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
- if (TargetRegisterInfo::isImpreciseLaneMask(LaneMask))
- SuperDefs.push_back(PhysReg);
}
}
@@ -470,29 +444,5 @@ void VirtRegRewriter::rewrite() {
}
}
}
-
- // Tell MRI about physical registers in use.
- if (NoReturnInsts.empty()) {
- for (SparseSet<unsigned>::iterator
- RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI)
- if (!MRI->reg_nodbg_empty(*RegI))
- MRI->setPhysRegUsed(*RegI);
- } else {
- for (SparseSet<unsigned>::iterator
- I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) {
- unsigned Reg = *I;
- if (MRI->reg_nodbg_empty(Reg))
- continue;
- // Check if this register has a use that will impact the rest of the
- // code. Uses in debug and noreturn instructions do not impact the
- // generated code.
- for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) {
- if (!NoReturnInsts.count(&It)) {
- MRI->setPhysRegUsed(Reg);
- break;
- }
- }
- }
- }
}
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 0d26ed3..52fb922 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -18,66 +18,40 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include <memory>
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
-using namespace llvm::PatternMatch;
#define DEBUG_TYPE "winehprepare"
-namespace {
-
-// This map is used to model frame variable usage during outlining, to
-// construct a structure type to hold the frame variables in a frame
-// allocation block, and to remap the frame variable allocas (including
-// spill locations as needed) to GEPs that get the variable from the
-// frame allocation structure.
-typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap;
-
-// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't
-// quite null.
-AllocaInst *getCatchObjectSentinel() {
- return static_cast<AllocaInst *>(nullptr) + 1;
-}
-
-typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
+static cl::opt<bool> DisableDemotion(
+ "disable-demotion", cl::Hidden,
+ cl::desc(
+ "Clone multicolor basic blocks but do not demote cross funclet values"),
+ cl::init(false));
-class LandingPadActions;
-class LandingPadMap;
-
-typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy;
-typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy;
+static cl::opt<bool> DisableCleanups(
+ "disable-cleanups", cl::Hidden,
+ cl::desc("Do not remove implausible terminators or other similar cleanups"),
+ cl::init(false));
+namespace {
+
class WinEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- WinEHPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID) {
- if (TM)
- TheTriple = TM->getTargetTriple();
- }
+ WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {}
bool runOnFunction(Function &Fn) override;
@@ -90,264 +64,27 @@ public:
}
private:
- bool prepareExceptionHandlers(Function &F,
- SmallVectorImpl<LandingPadInst *> &LPads);
- void identifyEHBlocks(Function &F, SmallVectorImpl<LandingPadInst *> &LPads);
- void promoteLandingPadValues(LandingPadInst *LPad);
- void demoteValuesLiveAcrossHandlers(Function &F,
- SmallVectorImpl<LandingPadInst *> &LPads);
- void findSEHEHReturnPoints(Function &F,
- SetVector<BasicBlock *> &EHReturnBlocks);
- void findCXXEHReturnPoints(Function &F,
- SetVector<BasicBlock *> &EHReturnBlocks);
- void getPossibleReturnTargets(Function *ParentF, Function *HandlerF,
- SetVector<BasicBlock*> &Targets);
- void completeNestedLandingPad(Function *ParentFn,
- LandingPadInst *OutlinedLPad,
- const LandingPadInst *OriginalLPad,
- FrameVarInfoMap &VarInfo);
- Function *createHandlerFunc(Function *ParentFn, Type *RetTy,
- const Twine &Name, Module *M, Value *&ParentFP);
- bool outlineHandler(ActionHandler *Action, Function *SrcFn,
- LandingPadInst *LPad, BasicBlock *StartBB,
- FrameVarInfoMap &VarInfo);
- void addStubInvokeToHandlerIfNeeded(Function *Handler);
-
- void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
- CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
- VisitedBlockSet &VisitedBlocks);
- void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB,
- BasicBlock *EndBB);
-
- void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB);
-
- Triple TheTriple;
+ void insertPHIStores(PHINode *OriginalPHI, AllocaInst *SpillSlot);
+ void
+ insertPHIStore(BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+ SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist);
+ AllocaInst *insertPHILoads(PHINode *PN, Function &F);
+ void replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+ DenseMap<BasicBlock *, Value *> &Loads, Function &F);
+ bool prepareExplicitEH(Function &F);
+ void colorFunclets(Function &F);
+
+ void demotePHIsOnFunclets(Function &F);
+ void cloneCommonBlocks(Function &F);
+ void removeImplausibleInstructions(Function &F);
+ void cleanupPreparedFunclets(Function &F);
+ void verifyPreparedFunclets(Function &F);
// All fields are reset by runOnFunction.
- DominatorTree *DT = nullptr;
- const TargetLibraryInfo *LibInfo = nullptr;
EHPersonality Personality = EHPersonality::Unknown;
- CatchHandlerMapTy CatchHandlerMap;
- CleanupHandlerMapTy CleanupHandlerMap;
- DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps;
- SmallPtrSet<BasicBlock *, 4> NormalBlocks;
- SmallPtrSet<BasicBlock *, 4> EHBlocks;
- SetVector<BasicBlock *> EHReturnBlocks;
-
- // This maps landing pad instructions found in outlined handlers to
- // the landing pad instruction in the parent function from which they
- // were cloned. The cloned/nested landing pad is used as the key
- // because the landing pad may be cloned into multiple handlers.
- // This map will be used to add the llvm.eh.actions call to the nested
- // landing pads after all handlers have been outlined.
- DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP;
-
- // This maps blocks in the parent function which are destinations of
- // catch handlers to cloned blocks in (other) outlined handlers. This
- // handles the case where a nested landing pads has a catch handler that
- // returns to a handler function rather than the parent function.
- // The original block is used as the key here because there should only
- // ever be one handler function from which the cloned block is not pruned.
- // The original block will be pruned from the parent function after all
- // handlers have been outlined. This map will be used to adjust the
- // return instructions of handlers which return to the block that was
- // outlined into a handler. This is done after all handlers have been
- // outlined but before the outlined code is pruned from the parent function.
- DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks;
-
- // Map from outlined handler to call to parent local address. Only used for
- // 32-bit EH.
- DenseMap<Function *, Value *> HandlerToParentFP;
-
- AllocaInst *SEHExceptionCodeSlot = nullptr;
-};
-
-class WinEHFrameVariableMaterializer : public ValueMaterializer {
-public:
- WinEHFrameVariableMaterializer(Function *OutlinedFn, Value *ParentFP,
- FrameVarInfoMap &FrameVarInfo);
- ~WinEHFrameVariableMaterializer() override {}
-
- Value *materializeValueFor(Value *V) override;
-
- void escapeCatchObject(Value *V);
-
-private:
- FrameVarInfoMap &FrameVarInfo;
- IRBuilder<> Builder;
-};
-
-class LandingPadMap {
-public:
- LandingPadMap() : OriginLPad(nullptr) {}
- void mapLandingPad(const LandingPadInst *LPad);
-
- bool isInitialized() { return OriginLPad != nullptr; }
-
- bool isOriginLandingPadBlock(const BasicBlock *BB) const;
- bool isLandingPadSpecificInst(const Instruction *Inst) const;
-
- void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
- Value *SelectorValue) const;
-
-private:
- const LandingPadInst *OriginLPad;
- // We will normally only see one of each of these instructions, but
- // if more than one occurs for some reason we can handle that.
- TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs;
- TinyPtrVector<const ExtractValueInst *> ExtractedSelectors;
-};
-
-class WinEHCloningDirectorBase : public CloningDirector {
-public:
- WinEHCloningDirectorBase(Function *HandlerFn, Value *ParentFP,
- FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
- : Materializer(HandlerFn, ParentFP, VarInfo),
- SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())),
- Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())),
- LPadMap(LPadMap), ParentFP(ParentFP) {}
-
- CloningAction handleInstruction(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
-
- virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleInvoke(ValueToValueMapTy &VMap,
- const InvokeInst *Invoke,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleResume(ValueToValueMapTy &VMap,
- const ResumeInst *Resume,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleCompare(ValueToValueMapTy &VMap,
- const CmpInst *Compare,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap,
- const LandingPadInst *LPad,
- BasicBlock *NewBB) = 0;
-
- ValueMaterializer *getValueMaterializer() override { return &Materializer; }
-
-protected:
- WinEHFrameVariableMaterializer Materializer;
- Type *SelectorIDType;
- Type *Int8PtrType;
- LandingPadMap &LPadMap;
-
- /// The value representing the parent frame pointer.
- Value *ParentFP;
-};
-
-class WinEHCatchDirector : public WinEHCloningDirectorBase {
-public:
- WinEHCatchDirector(
- Function *CatchFn, Value *ParentFP, Value *Selector,
- FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap,
- DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads,
- DominatorTree *DT, SmallPtrSetImpl<BasicBlock *> &EHBlocks)
- : WinEHCloningDirectorBase(CatchFn, ParentFP, VarInfo, LPadMap),
- CurrentSelector(Selector->stripPointerCasts()),
- ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads),
- DT(DT), EHBlocks(EHBlocks) {}
-
- CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) override;
- CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
- BasicBlock *NewBB) override;
- CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
- BasicBlock *NewBB) override;
- CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
- BasicBlock *NewBB) override;
- CloningAction handleLandingPad(ValueToValueMapTy &VMap,
- const LandingPadInst *LPad,
- BasicBlock *NewBB) override;
-
- Value *getExceptionVar() { return ExceptionObjectVar; }
- TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
-
-private:
- Value *CurrentSelector;
-
- Value *ExceptionObjectVar;
- TinyPtrVector<BasicBlock *> ReturnTargets;
- // This will be a reference to the field of the same name in the WinEHPrepare
- // object which instantiates this WinEHCatchDirector object.
- DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP;
- DominatorTree *DT;
- SmallPtrSetImpl<BasicBlock *> &EHBlocks;
-};
-
-class WinEHCleanupDirector : public WinEHCloningDirectorBase {
-public:
- WinEHCleanupDirector(Function *CleanupFn, Value *ParentFP,
- FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
- : WinEHCloningDirectorBase(CleanupFn, ParentFP, VarInfo,
- LPadMap) {}
-
- CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) override;
- CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
- BasicBlock *NewBB) override;
- CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
- BasicBlock *NewBB) override;
- CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
- BasicBlock *NewBB) override;
- CloningAction handleLandingPad(ValueToValueMapTy &VMap,
- const LandingPadInst *LPad,
- BasicBlock *NewBB) override;
-};
-
-class LandingPadActions {
-public:
- LandingPadActions() : HasCleanupHandlers(false) {}
-
- void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); }
- void insertCleanupHandler(CleanupHandler *Action) {
- Actions.push_back(Action);
- HasCleanupHandlers = true;
- }
-
- bool includesCleanup() const { return HasCleanupHandlers; }
-
- SmallVectorImpl<ActionHandler *> &actions() { return Actions; }
- SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); }
- SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); }
-
-private:
- // Note that this class does not own the ActionHandler objects in this vector.
- // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap
- // in the WinEHPrepare class.
- SmallVector<ActionHandler *, 4> Actions;
- bool HasCleanupHandlers;
+ DenseMap<BasicBlock *, ColorVector> BlockColors;
+ MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks;
};
} // end anonymous namespace
@@ -361,2536 +98,987 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
}
bool WinEHPrepare::runOnFunction(Function &Fn) {
- // No need to prepare outlined handlers.
- if (Fn.hasFnAttribute("wineh-parent"))
- return false;
-
- SmallVector<LandingPadInst *, 4> LPads;
- SmallVector<ResumeInst *, 4> Resumes;
- for (BasicBlock &BB : Fn) {
- if (auto *LP = BB.getLandingPadInst())
- LPads.push_back(LP);
- if (auto *Resume = dyn_cast<ResumeInst>(BB.getTerminator()))
- Resumes.push_back(Resume);
- }
-
- // No need to prepare functions that lack landing pads.
- if (LPads.empty())
+ if (!Fn.hasPersonalityFn())
return false;
// Classify the personality to see what kind of preparation we need.
Personality = classifyEHPersonality(Fn.getPersonalityFn());
- // Do nothing if this is not an MSVC personality.
- if (!isMSVCEHPersonality(Personality))
+ // Do nothing if this is not a funclet-based personality.
+ if (!isFuncletEHPersonality(Personality))
return false;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
- // If there were any landing pads, prepareExceptionHandlers will make changes.
- prepareExceptionHandlers(Fn, LPads);
- return true;
+ return prepareExplicitEH(Fn);
}
bool WinEHPrepare::doFinalization(Module &M) { return false; }
-void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
-
-static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
- Constant *&Selector, BasicBlock *&NextBB);
-
-// Finds blocks reachable from the starting set Worklist. Does not follow unwind
-// edges or blocks listed in StopPoints.
-static void findReachableBlocks(SmallPtrSetImpl<BasicBlock *> &ReachableBBs,
- SetVector<BasicBlock *> &Worklist,
- const SetVector<BasicBlock *> *StopPoints) {
- while (!Worklist.empty()) {
- BasicBlock *BB = Worklist.pop_back_val();
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
- // Don't cross blocks that we should stop at.
- if (StopPoints && StopPoints->count(BB))
- continue;
-
- if (!ReachableBBs.insert(BB).second)
- continue; // Already visited.
-
- // Don't follow unwind edges of invokes.
- if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- Worklist.insert(II->getNormalDest());
- continue;
- }
-
- // Otherwise, follow all successors.
- Worklist.insert(succ_begin(BB), succ_end(BB));
- }
-}
-
-// Attempt to find an instruction where a block can be split before
-// a call to llvm.eh.begincatch and its operands. If the block
-// begins with the begincatch call or one of its adjacent operands
-// the block will not be split.
-static Instruction *findBeginCatchSplitPoint(BasicBlock *BB,
- IntrinsicInst *II) {
- // If the begincatch call is already the first instruction in the block,
- // don't split.
- Instruction *FirstNonPHI = BB->getFirstNonPHI();
- if (II == FirstNonPHI)
- return nullptr;
-
- // If either operand is in the same basic block as the instruction and
- // isn't used by another instruction before the begincatch call, include it
- // in the split block.
- auto *Op0 = dyn_cast<Instruction>(II->getOperand(0));
- auto *Op1 = dyn_cast<Instruction>(II->getOperand(1));
-
- Instruction *I = II->getPrevNode();
- Instruction *LastI = II;
-
- while (I == Op0 || I == Op1) {
- // If the block begins with one of the operands and there are no other
- // instructions between the operand and the begincatch call, don't split.
- if (I == FirstNonPHI)
- return nullptr;
-
- LastI = I;
- I = I->getPrevNode();
- }
-
- // If there is at least one instruction in the block before the begincatch
- // call and its operands, split the block at either the begincatch or
- // its operand.
- return LastI;
+static int addUnwindMapEntry(WinEHFuncInfo &FuncInfo, int ToState,
+ const BasicBlock *BB) {
+ CxxUnwindMapEntry UME;
+ UME.ToState = ToState;
+ UME.Cleanup = BB;
+ FuncInfo.CxxUnwindMap.push_back(UME);
+ return FuncInfo.getLastStateNumber();
}
-/// Find all points where exceptional control rejoins normal control flow via
-/// llvm.eh.endcatch. Add them to the normal bb reachability worklist.
-void WinEHPrepare::findCXXEHReturnPoints(
- Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
- for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
- BasicBlock *BB = BBI;
- for (Instruction &I : *BB) {
- if (match(&I, m_Intrinsic<Intrinsic::eh_begincatch>())) {
- Instruction *SplitPt =
- findBeginCatchSplitPoint(BB, cast<IntrinsicInst>(&I));
- if (SplitPt) {
- // Split the block before the llvm.eh.begincatch call to allow
- // cleanup and catch code to be distinguished later.
- // Do not update BBI because we still need to process the
- // portion of the block that we are splitting off.
- SplitBlock(BB, SplitPt, DT);
- break;
- }
- }
- if (match(&I, m_Intrinsic<Intrinsic::eh_endcatch>())) {
- // Split the block after the call to llvm.eh.endcatch if there is
- // anything other than an unconditional branch, or if the successor
- // starts with a phi.
- auto *Br = dyn_cast<BranchInst>(I.getNextNode());
- if (!Br || !Br->isUnconditional() ||
- isa<PHINode>(Br->getSuccessor(0)->begin())) {
- DEBUG(dbgs() << "splitting block " << BB->getName()
- << " with llvm.eh.endcatch\n");
- BBI = SplitBlock(BB, I.getNextNode(), DT);
- }
- // The next BB is normal control flow.
- EHReturnBlocks.insert(BB->getTerminator()->getSuccessor(0));
- break;
- }
- }
+static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow,
+ int TryHigh, int CatchHigh,
+ ArrayRef<const CatchPadInst *> Handlers) {
+ WinEHTryBlockMapEntry TBME;
+ TBME.TryLow = TryLow;
+ TBME.TryHigh = TryHigh;
+ TBME.CatchHigh = CatchHigh;
+ assert(TBME.TryLow <= TBME.TryHigh);
+ for (const CatchPadInst *CPI : Handlers) {
+ WinEHHandlerType HT;
+ Constant *TypeInfo = cast<Constant>(CPI->getArgOperand(0));
+ if (TypeInfo->isNullValue())
+ HT.TypeDescriptor = nullptr;
+ else
+ HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts());
+ HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue();
+ HT.Handler = CPI->getParent();
+ if (isa<ConstantPointerNull>(CPI->getArgOperand(2)))
+ HT.CatchObj.Alloca = nullptr;
+ else
+ HT.CatchObj.Alloca = cast<AllocaInst>(CPI->getArgOperand(2));
+ TBME.HandlerArray.push_back(HT);
}
+ FuncInfo.TryBlockMap.push_back(TBME);
}
-static bool isCatchAllLandingPad(const BasicBlock *BB) {
- const LandingPadInst *LP = BB->getLandingPadInst();
- if (!LP)
- return false;
- unsigned N = LP->getNumClauses();
- return (N > 0 && LP->isCatch(N - 1) &&
- isa<ConstantPointerNull>(LP->getClause(N - 1)));
+static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CleanupPad) {
+ for (const User *U : CleanupPad->users())
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
+ return CRI->getUnwindDest();
+ return nullptr;
}
-/// Find all points where exceptions control rejoins normal control flow via
-/// selector dispatch.
-void WinEHPrepare::findSEHEHReturnPoints(
- Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
- for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
- BasicBlock *BB = BBI;
- // If the landingpad is a catch-all, treat the whole lpad as if it is
- // reachable from normal control flow.
- // FIXME: This is imprecise. We need a better way of identifying where a
- // catch-all starts and cleanups stop. As far as LLVM is concerned, there
- // is no difference.
- if (isCatchAllLandingPad(BB)) {
- EHReturnBlocks.insert(BB);
+static void calculateStateNumbersForInvokes(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ auto *F = const_cast<Function *>(Fn);
+ DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(*F);
+ for (BasicBlock &BB : *F) {
+ auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+ if (!II)
continue;
- }
-
- BasicBlock *CatchHandler;
- BasicBlock *NextBB;
- Constant *Selector;
- if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) {
- // Split the edge if there are multiple predecessors. This creates a place
- // where we can insert EH recovery code.
- if (!CatchHandler->getSinglePredecessor()) {
- DEBUG(dbgs() << "splitting EH return edge from " << BB->getName()
- << " to " << CatchHandler->getName() << '\n');
- BBI = CatchHandler = SplitCriticalEdge(
- BB, std::find(succ_begin(BB), succ_end(BB), CatchHandler));
- }
- EHReturnBlocks.insert(CatchHandler);
- }
- }
-}
-void WinEHPrepare::identifyEHBlocks(Function &F,
- SmallVectorImpl<LandingPadInst *> &LPads) {
- DEBUG(dbgs() << "Demoting values live across exception handlers in function "
- << F.getName() << '\n');
-
- // Build a set of all non-exceptional blocks and exceptional blocks.
- // - Non-exceptional blocks are blocks reachable from the entry block while
- // not following invoke unwind edges.
- // - Exceptional blocks are blocks reachable from landingpads. Analysis does
- // not follow llvm.eh.endcatch blocks, which mark a transition from
- // exceptional to normal control.
-
- if (Personality == EHPersonality::MSVC_CXX)
- findCXXEHReturnPoints(F, EHReturnBlocks);
- else
- findSEHEHReturnPoints(F, EHReturnBlocks);
-
- DEBUG({
- dbgs() << "identified the following blocks as EH return points:\n";
- for (BasicBlock *BB : EHReturnBlocks)
- dbgs() << " " << BB->getName() << '\n';
- });
-
-// Join points should not have phis at this point, unless they are a
-// landingpad, in which case we will demote their phis later.
-#ifndef NDEBUG
- for (BasicBlock *BB : EHReturnBlocks)
- assert((BB->isLandingPad() || !isa<PHINode>(BB->begin())) &&
- "non-lpad EH return block has phi");
-#endif
-
- // Normal blocks are the blocks reachable from the entry block and all EH
- // return points.
- SetVector<BasicBlock *> Worklist;
- Worklist = EHReturnBlocks;
- Worklist.insert(&F.getEntryBlock());
- findReachableBlocks(NormalBlocks, Worklist, nullptr);
- DEBUG({
- dbgs() << "marked the following blocks as normal:\n";
- for (BasicBlock *BB : NormalBlocks)
- dbgs() << " " << BB->getName() << '\n';
- });
-
- // Exceptional blocks are the blocks reachable from landingpads that don't
- // cross EH return points.
- Worklist.clear();
- for (auto *LPI : LPads)
- Worklist.insert(LPI->getParent());
- findReachableBlocks(EHBlocks, Worklist, &EHReturnBlocks);
- DEBUG({
- dbgs() << "marked the following blocks as exceptional:\n";
- for (BasicBlock *BB : EHBlocks)
- dbgs() << " " << BB->getName() << '\n';
- });
-
-}
-
-/// Ensure that all values live into and out of exception handlers are stored
-/// in memory.
-/// FIXME: This falls down when values are defined in one handler and live into
-/// another handler. For example, a cleanup defines a value used only by a
-/// catch handler.
-void WinEHPrepare::demoteValuesLiveAcrossHandlers(
- Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
- DEBUG(dbgs() << "Demoting values live across exception handlers in function "
- << F.getName() << '\n');
-
- // identifyEHBlocks() should have been called before this function.
- assert(!NormalBlocks.empty());
-
- // Try to avoid demoting EH pointer and selector values. They get in the way
- // of our pattern matching.
- SmallPtrSet<Instruction *, 10> EHVals;
- for (BasicBlock &BB : F) {
- LandingPadInst *LP = BB.getLandingPadInst();
- if (!LP)
- continue;
- EHVals.insert(LP);
- for (User *U : LP->users()) {
- auto *EI = dyn_cast<ExtractValueInst>(U);
- if (!EI)
- continue;
- EHVals.insert(EI);
- for (User *U2 : EI->users()) {
- if (auto *PN = dyn_cast<PHINode>(U2))
- EHVals.insert(PN);
- }
+ auto &BBColors = BlockColors[&BB];
+ assert(BBColors.size() == 1 && "multi-color BB not removed by preparation");
+ BasicBlock *FuncletEntryBB = BBColors.front();
+
+ BasicBlock *FuncletUnwindDest;
+ auto *FuncletPad =
+ dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI());
+ assert(FuncletPad || FuncletEntryBB == &Fn->getEntryBlock());
+ if (!FuncletPad)
+ FuncletUnwindDest = nullptr;
+ else if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
+ FuncletUnwindDest = CatchPad->getCatchSwitch()->getUnwindDest();
+ else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(FuncletPad))
+ FuncletUnwindDest = getCleanupRetUnwindDest(CleanupPad);
+ else
+ llvm_unreachable("unexpected funclet pad!");
+
+ BasicBlock *InvokeUnwindDest = II->getUnwindDest();
+ int BaseState = -1;
+ if (FuncletUnwindDest == InvokeUnwindDest) {
+ auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
+ if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
+ BaseState = BaseStateI->second;
}
- }
- SetVector<Argument *> ArgsToDemote;
- SetVector<Instruction *> InstrsToDemote;
- for (BasicBlock &BB : F) {
- bool IsNormalBB = NormalBlocks.count(&BB);
- bool IsEHBB = EHBlocks.count(&BB);
- if (!IsNormalBB && !IsEHBB)
- continue; // Blocks that are neither normal nor EH are unreachable.
- for (Instruction &I : BB) {
- for (Value *Op : I.operands()) {
- // Don't demote static allocas, constants, and labels.
- if (isa<Constant>(Op) || isa<BasicBlock>(Op) || isa<InlineAsm>(Op))
- continue;
- auto *AI = dyn_cast<AllocaInst>(Op);
- if (AI && AI->isStaticAlloca())
- continue;
-
- if (auto *Arg = dyn_cast<Argument>(Op)) {
- if (IsEHBB) {
- DEBUG(dbgs() << "Demoting argument " << *Arg
- << " used by EH instr: " << I << "\n");
- ArgsToDemote.insert(Arg);
- }
- continue;
- }
-
- // Don't demote EH values.
- auto *OpI = cast<Instruction>(Op);
- if (EHVals.count(OpI))
- continue;
-
- BasicBlock *OpBB = OpI->getParent();
- // If a value is produced and consumed in the same BB, we don't need to
- // demote it.
- if (OpBB == &BB)
- continue;
- bool IsOpNormalBB = NormalBlocks.count(OpBB);
- bool IsOpEHBB = EHBlocks.count(OpBB);
- if (IsNormalBB != IsOpNormalBB || IsEHBB != IsOpEHBB) {
- DEBUG({
- dbgs() << "Demoting instruction live in-out from EH:\n";
- dbgs() << "Instr: " << *OpI << '\n';
- dbgs() << "User: " << I << '\n';
- });
- InstrsToDemote.insert(OpI);
- }
- }
- }
- }
-
- // Demote values live into and out of handlers.
- // FIXME: This demotion is inefficient. We should insert spills at the point
- // of definition, insert one reload in each handler that uses the value, and
- // insert reloads in the BB used to rejoin normal control flow.
- Instruction *AllocaInsertPt = F.getEntryBlock().getFirstInsertionPt();
- for (Instruction *I : InstrsToDemote)
- DemoteRegToStack(*I, false, AllocaInsertPt);
-
- // Demote arguments separately, and only for uses in EH blocks.
- for (Argument *Arg : ArgsToDemote) {
- auto *Slot = new AllocaInst(Arg->getType(), nullptr,
- Arg->getName() + ".reg2mem", AllocaInsertPt);
- SmallVector<User *, 4> Users(Arg->user_begin(), Arg->user_end());
- for (User *U : Users) {
- auto *I = dyn_cast<Instruction>(U);
- if (I && EHBlocks.count(I->getParent())) {
- auto *Reload = new LoadInst(Slot, Arg->getName() + ".reload", false, I);
- U->replaceUsesOfWith(Arg, Reload);
- }
+ if (BaseState != -1) {
+ FuncInfo.InvokeStateMap[II] = BaseState;
+ } else {
+ Instruction *PadInst = InvokeUnwindDest->getFirstNonPHI();
+ assert(FuncInfo.EHPadStateMap.count(PadInst) && "EH Pad has no state!");
+ FuncInfo.InvokeStateMap[II] = FuncInfo.EHPadStateMap[PadInst];
}
- new StoreInst(Arg, Slot, AllocaInsertPt);
- }
-
- // Demote landingpad phis, as the landingpad will be removed from the machine
- // CFG.
- for (LandingPadInst *LPI : LPads) {
- BasicBlock *BB = LPI->getParent();
- while (auto *Phi = dyn_cast<PHINode>(BB->begin()))
- DemotePHIToStack(Phi, AllocaInsertPt);
}
-
- DEBUG(dbgs() << "Demoted " << InstrsToDemote.size() << " instructions and "
- << ArgsToDemote.size() << " arguments for WinEHPrepare\n\n");
}
-bool WinEHPrepare::prepareExceptionHandlers(
- Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
- // Don't run on functions that are already prepared.
- for (LandingPadInst *LPad : LPads) {
- BasicBlock *LPadBB = LPad->getParent();
- for (Instruction &Inst : *LPadBB)
- if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>()))
- return false;
- }
-
- identifyEHBlocks(F, LPads);
- demoteValuesLiveAcrossHandlers(F, LPads);
-
- // These containers are used to re-map frame variables that are used in
- // outlined catch and cleanup handlers. They will be populated as the
- // handlers are outlined.
- FrameVarInfoMap FrameVarInfo;
-
- bool HandlersOutlined = false;
-
- Module *M = F.getParent();
- LLVMContext &Context = M->getContext();
-
- // Create a new function to receive the handler contents.
- PointerType *Int8PtrType = Type::getInt8PtrTy(Context);
- Type *Int32Type = Type::getInt32Ty(Context);
- Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions);
-
- if (isAsynchronousEHPersonality(Personality)) {
- // FIXME: Switch the ehptr type to i32 and then switch this.
- SEHExceptionCodeSlot =
- new AllocaInst(Int8PtrType, nullptr, "seh_exception_code",
- F.getEntryBlock().getFirstInsertionPt());
+// Given BB which ends in an unwind edge, return the EHPad that this BB belongs
+// to. If the unwind edge came from an invoke, return null.
+static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
+ Value *ParentPad) {
+ const TerminatorInst *TI = BB->getTerminator();
+ if (isa<InvokeInst>(TI))
+ return nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+ if (CatchSwitch->getParentPad() != ParentPad)
+ return nullptr;
+ return BB;
}
+ assert(!TI->isEHPad() && "unexpected EHPad!");
+ auto *CleanupPad = cast<CleanupReturnInst>(TI)->getCleanupPad();
+ if (CleanupPad->getParentPad() != ParentPad)
+ return nullptr;
+ return CleanupPad->getParent();
+}
- // In order to handle the case where one outlined catch handler returns
- // to a block within another outlined catch handler that would otherwise
- // be unreachable, we need to outline the nested landing pad before we
- // outline the landing pad which encloses it.
- if (!isAsynchronousEHPersonality(Personality))
- std::sort(LPads.begin(), LPads.end(),
- [this](LandingPadInst *const &L, LandingPadInst *const &R) {
- return DT->properlyDominates(R->getParent(), L->getParent());
- });
-
- // This container stores the llvm.eh.recover and IndirectBr instructions
- // that make up the body of each landing pad after it has been outlined.
- // We need to defer the population of the target list for the indirectbr
- // until all landing pads have been outlined so that we can handle the
- // case of blocks in the target that are reached only from nested
- // landing pads.
- SmallVector<std::pair<CallInst*, IndirectBrInst *>, 4> LPadImpls;
-
- for (LandingPadInst *LPad : LPads) {
- // Look for evidence that this landingpad has already been processed.
- bool LPadHasActionList = false;
- BasicBlock *LPadBB = LPad->getParent();
- for (Instruction &Inst : *LPadBB) {
- if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>())) {
- LPadHasActionList = true;
- break;
- }
- }
-
- // If we've already outlined the handlers for this landingpad,
- // there's nothing more to do here.
- if (LPadHasActionList)
- continue;
-
- // If either of the values in the aggregate returned by the landing pad is
- // extracted and stored to memory, promote the stored value to a register.
- promoteLandingPadValues(LPad);
-
- LandingPadActions Actions;
- mapLandingPadBlocks(LPad, Actions);
-
- HandlersOutlined |= !Actions.actions().empty();
- for (ActionHandler *Action : Actions) {
- if (Action->hasBeenProcessed())
- continue;
- BasicBlock *StartBB = Action->getStartBlock();
-
- // SEH doesn't do any outlining for catches. Instead, pass the handler
- // basic block addr to llvm.eh.actions and list the block as a return
- // target.
- if (isAsynchronousEHPersonality(Personality)) {
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- processSEHCatchHandler(CatchAction, StartBB);
- continue;
- }
- }
-
- outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo);
- }
-
- // Split the block after the landingpad instruction so that it is just a
- // call to llvm.eh.actions followed by indirectbr.
- assert(!isa<PHINode>(LPadBB->begin()) && "lpad phi not removed");
- SplitBlock(LPadBB, LPad->getNextNode(), DT);
- // Erase the branch inserted by the split so we can insert indirectbr.
- LPadBB->getTerminator()->eraseFromParent();
-
- // Replace all extracted values with undef and ultimately replace the
- // landingpad with undef.
- SmallVector<Instruction *, 4> SEHCodeUses;
- SmallVector<Instruction *, 4> EHUndefs;
- for (User *U : LPad->users()) {
- auto *E = dyn_cast<ExtractValueInst>(U);
- if (!E)
- continue;
- assert(E->getNumIndices() == 1 &&
- "Unexpected operation: extracting both landing pad values");
- unsigned Idx = *E->idx_begin();
- assert((Idx == 0 || Idx == 1) && "unexpected index");
- if (Idx == 0 && isAsynchronousEHPersonality(Personality))
- SEHCodeUses.push_back(E);
- else
- EHUndefs.push_back(E);
- }
- for (Instruction *E : EHUndefs) {
- E->replaceAllUsesWith(UndefValue::get(E->getType()));
- E->eraseFromParent();
- }
- LPad->replaceAllUsesWith(UndefValue::get(LPad->getType()));
-
- // Rewrite uses of the exception pointer to loads of an alloca.
- while (!SEHCodeUses.empty()) {
- Instruction *E = SEHCodeUses.pop_back_val();
- SmallVector<Use *, 4> Uses;
- for (Use &U : E->uses())
- Uses.push_back(&U);
- for (Use *U : Uses) {
- auto *I = cast<Instruction>(U->getUser());
- if (isa<ResumeInst>(I))
- continue;
- if (auto *Phi = dyn_cast<PHINode>(I))
- SEHCodeUses.push_back(Phi);
- else
- U->set(new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I));
- }
- E->replaceAllUsesWith(UndefValue::get(E->getType()));
- E->eraseFromParent();
- }
-
- // Add a call to describe the actions for this landing pad.
- std::vector<Value *> ActionArgs;
- for (ActionHandler *Action : Actions) {
- // Action codes from docs are: 0 cleanup, 1 catch.
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- ActionArgs.push_back(ConstantInt::get(Int32Type, 1));
- ActionArgs.push_back(CatchAction->getSelector());
- // Find the frame escape index of the exception object alloca in the
- // parent.
- int FrameEscapeIdx = -1;
- Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar());
- if (EHObj && !isa<ConstantPointerNull>(EHObj)) {
- auto I = FrameVarInfo.find(EHObj);
- assert(I != FrameVarInfo.end() &&
- "failed to map llvm.eh.begincatch var");
- FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I);
- }
- ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx));
- } else {
- ActionArgs.push_back(ConstantInt::get(Int32Type, 0));
- }
- ActionArgs.push_back(Action->getHandlerBlockOrFunc());
- }
- CallInst *Recover =
- CallInst::Create(ActionIntrin, ActionArgs, "recover", LPadBB);
-
- SetVector<BasicBlock *> ReturnTargets;
- for (ActionHandler *Action : Actions) {
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- const auto &CatchTargets = CatchAction->getReturnTargets();
- ReturnTargets.insert(CatchTargets.begin(), CatchTargets.end());
- }
- }
- IndirectBrInst *Branch =
- IndirectBrInst::Create(Recover, ReturnTargets.size(), LPadBB);
- for (BasicBlock *Target : ReturnTargets)
- Branch->addDestination(Target);
-
- if (!isAsynchronousEHPersonality(Personality)) {
- // C++ EH must repopulate the targets later to handle the case of
- // targets that are reached indirectly through nested landing pads.
- LPadImpls.push_back(std::make_pair(Recover, Branch));
- }
-
- } // End for each landingpad
+static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
+ const Instruction *FirstNonPHI,
+ int ParentState) {
+ const BasicBlock *BB = FirstNonPHI->getParent();
+ assert(BB->isEHPad() && "not a funclet!");
- // If nothing got outlined, there is no more processing to be done.
- if (!HandlersOutlined)
- return false;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+ assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+ "shouldn't revist catch funclets!");
- // Replace any nested landing pad stubs with the correct action handler.
- // This must be done before we remove unreachable blocks because it
- // cleans up references to outlined blocks that will be deleted.
- for (auto &LPadPair : NestedLPtoOriginalLP)
- completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo);
- NestedLPtoOriginalLP.clear();
-
- // Update the indirectbr instructions' target lists if necessary.
- SetVector<BasicBlock*> CheckedTargets;
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- for (auto &LPadImplPair : LPadImpls) {
- IntrinsicInst *Recover = cast<IntrinsicInst>(LPadImplPair.first);
- IndirectBrInst *Branch = LPadImplPair.second;
-
- // Get a list of handlers called by
- parseEHActions(Recover, ActionList);
-
- // Add an indirect branch listing possible successors of the catch handlers.
- SetVector<BasicBlock *> ReturnTargets;
- for (const auto &Action : ActionList) {
- if (auto *CA = dyn_cast<CatchHandler>(Action.get())) {
- Function *Handler = cast<Function>(CA->getHandlerBlockOrFunc());
- getPossibleReturnTargets(&F, Handler, ReturnTargets);
- }
- }
- ActionList.clear();
- // Clear any targets we already knew about.
- for (unsigned int I = 0, E = Branch->getNumDestinations(); I < E; ++I) {
- BasicBlock *KnownTarget = Branch->getDestination(I);
- if (ReturnTargets.count(KnownTarget))
- ReturnTargets.remove(KnownTarget);
+ SmallVector<const CatchPadInst *, 2> Handlers;
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ auto *CatchPad = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
+ Handlers.push_back(CatchPad);
}
- for (BasicBlock *Target : ReturnTargets) {
- Branch->addDestination(Target);
- // The target may be a block that we excepted to get pruned.
- // If it is, it may contain a call to llvm.eh.endcatch.
- if (CheckedTargets.insert(Target)) {
- // Earlier preparations guarantee that all calls to llvm.eh.endcatch
- // will be followed by an unconditional branch.
- auto *Br = dyn_cast<BranchInst>(Target->getTerminator());
- if (Br && Br->isUnconditional() &&
- Br != Target->getFirstNonPHIOrDbgOrLifetime()) {
- Instruction *Prev = Br->getPrevNode();
- if (match(cast<Value>(Prev), m_Intrinsic<Intrinsic::eh_endcatch>()))
- Prev->eraseFromParent();
- }
+ int TryLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+ FuncInfo.EHPadStateMap[CatchSwitch] = TryLow;
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CatchSwitch->getParentPad())))
+ calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ TryLow);
+ int CatchLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+
+ // catchpads are separate funclets in C++ EH due to the way rethrow works.
+ int TryHigh = CatchLow - 1;
+ for (const auto *CatchPad : Handlers) {
+ FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow;
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
+ if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
+ calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
+ if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
+ if (getCleanupRetUnwindDest(InnerCleanupPad) ==
+ CatchSwitch->getUnwindDest())
+ calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
}
}
- }
- LPadImpls.clear();
-
- F.addFnAttr("wineh-parent", F.getName());
-
- // Delete any blocks that were only used by handlers that were outlined above.
- removeUnreachableBlocks(F);
+ int CatchHigh = FuncInfo.getLastStateNumber();
+ addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers);
+ DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n');
+ DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n');
+ DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh
+ << '\n');
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
- BasicBlock *Entry = &F.getEntryBlock();
- IRBuilder<> Builder(F.getParent()->getContext());
- Builder.SetInsertPoint(Entry->getFirstInsertionPt());
-
- Function *FrameEscapeFn =
- Intrinsic::getDeclaration(M, Intrinsic::localescape);
- Function *RecoverFrameFn =
- Intrinsic::getDeclaration(M, Intrinsic::localrecover);
- SmallVector<Value *, 8> AllocasToEscape;
-
- // Scan the entry block for an existing call to llvm.localescape. We need to
- // keep escaping those objects.
- for (Instruction &I : F.front()) {
- auto *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::localescape) {
- auto Args = II->arg_operands();
- AllocasToEscape.append(Args.begin(), Args.end());
- II->eraseFromParent();
- break;
- }
- }
+ // It's possible for a cleanup to be visited twice: it might have multiple
+ // cleanupret instructions.
+ if (FuncInfo.EHPadStateMap.count(CleanupPad))
+ return;
- // Finally, replace all of the temporary allocas for frame variables used in
- // the outlined handlers with calls to llvm.localrecover.
- for (auto &VarInfoEntry : FrameVarInfo) {
- Value *ParentVal = VarInfoEntry.first;
- TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second;
- AllocaInst *ParentAlloca = cast<AllocaInst>(ParentVal);
-
- // FIXME: We should try to sink unescaped allocas from the parent frame into
- // the child frame. If the alloca is escaped, we have to use the lifetime
- // markers to ensure that the alloca is only live within the child frame.
-
- // Add this alloca to the list of things to escape.
- AllocasToEscape.push_back(ParentAlloca);
-
- // Next replace all outlined allocas that are mapped to it.
- for (AllocaInst *TempAlloca : Allocas) {
- if (TempAlloca == getCatchObjectSentinel())
- continue; // Skip catch parameter sentinels.
- Function *HandlerFn = TempAlloca->getParent()->getParent();
- llvm::Value *FP = HandlerToParentFP[HandlerFn];
- assert(FP);
-
- // FIXME: Sink this localrecover into the blocks where it is used.
- Builder.SetInsertPoint(TempAlloca);
- Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
- Value *RecoverArgs[] = {
- Builder.CreateBitCast(&F, Int8PtrType, ""), FP,
- llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)};
- Instruction *RecoveredAlloca =
- Builder.CreateCall(RecoverFrameFn, RecoverArgs);
-
- // Add a pointer bitcast if the alloca wasn't an i8.
- if (RecoveredAlloca->getType() != TempAlloca->getType()) {
- RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8");
- RecoveredAlloca = cast<Instruction>(
- Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType()));
+ int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB);
+ FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+ DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB)) {
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CleanupPad->getParentPad()))) {
+ calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ CleanupState);
}
- TempAlloca->replaceAllUsesWith(RecoveredAlloca);
- TempAlloca->removeFromParent();
- RecoveredAlloca->takeName(TempAlloca);
- delete TempAlloca;
}
- } // End for each FrameVarInfo entry.
-
- // Insert 'call void (...)* @llvm.localescape(...)' at the end of the entry
- // block.
- Builder.SetInsertPoint(&F.getEntryBlock().back());
- Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
-
- if (SEHExceptionCodeSlot) {
- if (isAllocaPromotable(SEHExceptionCodeSlot)) {
- SmallPtrSet<BasicBlock *, 4> UserBlocks;
- for (User *U : SEHExceptionCodeSlot->users()) {
- if (auto *Inst = dyn_cast<Instruction>(U))
- UserBlocks.insert(Inst->getParent());
- }
- PromoteMemToReg(SEHExceptionCodeSlot, *DT);
- // After the promotion, kill off dead instructions.
- for (BasicBlock *BB : UserBlocks)
- SimplifyInstructionsInBlock(BB, LibInfo);
+ for (const User *U : CleanupPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ report_fatal_error("Cleanup funclets for the MSVC++ personality cannot "
+ "contain exceptional actions");
}
}
+}
- // Clean up the handler action maps we created for this function
- DeleteContainerSeconds(CatchHandlerMap);
- CatchHandlerMap.clear();
- DeleteContainerSeconds(CleanupHandlerMap);
- CleanupHandlerMap.clear();
- HandlerToParentFP.clear();
- DT = nullptr;
- LibInfo = nullptr;
- SEHExceptionCodeSlot = nullptr;
- EHBlocks.clear();
- NormalBlocks.clear();
- EHReturnBlocks.clear();
-
- return HandlersOutlined;
+static int addSEHExcept(WinEHFuncInfo &FuncInfo, int ParentState,
+ const Function *Filter, const BasicBlock *Handler) {
+ SEHUnwindMapEntry Entry;
+ Entry.ToState = ParentState;
+ Entry.IsFinally = false;
+ Entry.Filter = Filter;
+ Entry.Handler = Handler;
+ FuncInfo.SEHUnwindMap.push_back(Entry);
+ return FuncInfo.SEHUnwindMap.size() - 1;
}
-void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) {
- // If the return values of the landing pad instruction are extracted and
- // stored to memory, we want to promote the store locations to reg values.
- SmallVector<AllocaInst *, 2> EHAllocas;
-
- // The landingpad instruction returns an aggregate value. Typically, its
- // value will be passed to a pair of extract value instructions and the
- // results of those extracts are often passed to store instructions.
- // In unoptimized code the stored value will often be loaded and then stored
- // again.
- for (auto *U : LPad->users()) {
- ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
- if (!Extract)
- continue;
+static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState,
+ const BasicBlock *Handler) {
+ SEHUnwindMapEntry Entry;
+ Entry.ToState = ParentState;
+ Entry.IsFinally = true;
+ Entry.Filter = nullptr;
+ Entry.Handler = Handler;
+ FuncInfo.SEHUnwindMap.push_back(Entry);
+ return FuncInfo.SEHUnwindMap.size() - 1;
+}
- for (auto *EU : Extract->users()) {
- if (auto *Store = dyn_cast<StoreInst>(EU)) {
- auto *AV = cast<AllocaInst>(Store->getPointerOperand());
- EHAllocas.push_back(AV);
- }
+static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
+ const Instruction *FirstNonPHI,
+ int ParentState) {
+ const BasicBlock *BB = FirstNonPHI->getParent();
+ assert(BB->isEHPad() && "no a funclet!");
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+ assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+ "shouldn't revist catch funclets!");
+
+ // Extract the filter function and the __except basic block and create a
+ // state for them.
+ assert(CatchSwitch->getNumHandlers() == 1 &&
+ "SEH doesn't have multiple handlers per __try");
+ const auto *CatchPad =
+ cast<CatchPadInst>((*CatchSwitch->handler_begin())->getFirstNonPHI());
+ const BasicBlock *CatchPadBB = CatchPad->getParent();
+ const Constant *FilterOrNull =
+ cast<Constant>(CatchPad->getArgOperand(0)->stripPointerCasts());
+ const Function *Filter = dyn_cast<Function>(FilterOrNull);
+ assert((Filter || FilterOrNull->isNullValue()) &&
+ "unexpected filter value");
+ int TryState = addSEHExcept(FuncInfo, ParentState, Filter, CatchPadBB);
+
+ // Everything in the __try block uses TryState as its parent state.
+ FuncInfo.EHPadStateMap[CatchSwitch] = TryState;
+ DEBUG(dbgs() << "Assigning state #" << TryState << " to BB "
+ << CatchPadBB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CatchSwitch->getParentPad())))
+ calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ TryState);
+
+ // Everything in the __except block unwinds to ParentState, just like code
+ // outside the __try.
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
+ if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
+ calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+ if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
+ if (getCleanupRetUnwindDest(InnerCleanupPad) ==
+ CatchSwitch->getUnwindDest())
+ calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
}
- }
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
- // We can't do this without a dominator tree.
- assert(DT);
+ // It's possible for a cleanup to be visited twice: it might have multiple
+ // cleanupret instructions.
+ if (FuncInfo.EHPadStateMap.count(CleanupPad))
+ return;
- if (!EHAllocas.empty()) {
- PromoteMemToReg(EHAllocas, *DT);
- EHAllocas.clear();
+ int CleanupState = addSEHFinally(FuncInfo, ParentState, BB);
+ FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+ DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock =
+ getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad())))
+ calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ CleanupState);
+ for (const User *U : CleanupPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ report_fatal_error("Cleanup funclets for the SEH personality cannot "
+ "contain exceptional actions");
+ }
}
+}
- // After promotion, some extracts may be trivially dead. Remove them.
- SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end());
- for (auto *U : Users)
- RecursivelyDeleteTriviallyDeadInstructions(U);
+static bool isTopLevelPadForMSVC(const Instruction *EHPad) {
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(EHPad))
+ return isa<ConstantTokenNone>(CatchSwitch->getParentPad()) &&
+ CatchSwitch->unwindsToCaller();
+ if (auto *CleanupPad = dyn_cast<CleanupPadInst>(EHPad))
+ return isa<ConstantTokenNone>(CleanupPad->getParentPad()) &&
+ getCleanupRetUnwindDest(CleanupPad) == nullptr;
+ if (isa<CatchPadInst>(EHPad))
+ return false;
+ llvm_unreachable("unexpected EHPad!");
}
-void WinEHPrepare::getPossibleReturnTargets(Function *ParentF,
- Function *HandlerF,
- SetVector<BasicBlock*> &Targets) {
- for (BasicBlock &BB : *HandlerF) {
- // If the handler contains landing pads, check for any
- // handlers that may return directly to a block in the
- // parent function.
- if (auto *LPI = BB.getLandingPadInst()) {
- IntrinsicInst *Recover = cast<IntrinsicInst>(LPI->getNextNode());
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- parseEHActions(Recover, ActionList);
- for (const auto &Action : ActionList) {
- if (auto *CH = dyn_cast<CatchHandler>(Action.get())) {
- Function *NestedF = cast<Function>(CH->getHandlerBlockOrFunc());
- getPossibleReturnTargets(ParentF, NestedF, Targets);
- }
- }
- }
+void llvm::calculateSEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Don't compute state numbers twice.
+ if (!FuncInfo.SEHUnwindMap.empty())
+ return;
- auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
- if (!Ret)
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
continue;
-
- // Handler functions must always return a block address.
- BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
-
- // If this is the handler for a nested landing pad, the
- // return address may have been remapped to a block in the
- // parent handler. We're not interested in those.
- if (BA->getFunction() != ParentF)
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
continue;
-
- Targets.insert(BA->getBasicBlock());
+ ::calculateSEHStateNumbers(FuncInfo, FirstNonPHI, -1);
}
+
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
}
-void WinEHPrepare::completeNestedLandingPad(Function *ParentFn,
- LandingPadInst *OutlinedLPad,
- const LandingPadInst *OriginalLPad,
- FrameVarInfoMap &FrameVarInfo) {
- // Get the nested block and erase the unreachable instruction that was
- // temporarily inserted as its terminator.
- LLVMContext &Context = ParentFn->getContext();
- BasicBlock *OutlinedBB = OutlinedLPad->getParent();
- // If the nested landing pad was outlined before the landing pad that enclosed
- // it, it will already be in outlined form. In that case, we just need to see
- // if the returns and the enclosing branch instruction need to be updated.
- IndirectBrInst *Branch =
- dyn_cast<IndirectBrInst>(OutlinedBB->getTerminator());
- if (!Branch) {
- // If the landing pad wasn't in outlined form, it should be a stub with
- // an unreachable terminator.
- assert(isa<UnreachableInst>(OutlinedBB->getTerminator()));
- OutlinedBB->getTerminator()->eraseFromParent();
- // That should leave OutlinedLPad as the last instruction in its block.
- assert(&OutlinedBB->back() == OutlinedLPad);
- }
+void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Return if it's already been done.
+ if (!FuncInfo.EHPadStateMap.empty())
+ return;
- // The original landing pad will have already had its action intrinsic
- // built by the outlining loop. We need to clone that into the outlined
- // location. It may also be necessary to add references to the exception
- // variables to the outlined handler in which this landing pad is nested
- // and remap return instructions in the nested handlers that should return
- // to an address in the outlined handler.
- Function *OutlinedHandlerFn = OutlinedBB->getParent();
- BasicBlock::const_iterator II = OriginalLPad;
- ++II;
- // The instruction after the landing pad should now be a call to eh.actions.
- const Instruction *Recover = II;
- const IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover);
-
- // Remap the return target in the nested handler.
- SmallVector<BlockAddress *, 4> ActionTargets;
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- parseEHActions(EHActions, ActionList);
- for (const auto &Action : ActionList) {
- auto *Catch = dyn_cast<CatchHandler>(Action.get());
- if (!Catch)
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
continue;
- // The dyn_cast to function here selects C++ catch handlers and skips
- // SEH catch handlers.
- auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc());
- if (!Handler)
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
continue;
- // Visit all the return instructions, looking for places that return
- // to a location within OutlinedHandlerFn.
- for (BasicBlock &NestedHandlerBB : *Handler) {
- auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator());
- if (!Ret)
- continue;
-
- // Handler functions must always return a block address.
- BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
- // The original target will have been in the main parent function,
- // but if it is the address of a block that has been outlined, it
- // should be a block that was outlined into OutlinedHandlerFn.
- assert(BA->getFunction() == ParentFn);
-
- // Ignore targets that aren't part of an outlined handler function.
- if (!LPadTargetBlocks.count(BA->getBasicBlock()))
- continue;
-
- // If the return value is the address ofF a block that we
- // previously outlined into the parent handler function, replace
- // the return instruction and add the mapped target to the list
- // of possible return addresses.
- BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()];
- assert(MappedBB->getParent() == OutlinedHandlerFn);
- BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB);
- Ret->eraseFromParent();
- ReturnInst::Create(Context, NewBA, &NestedHandlerBB);
- ActionTargets.push_back(NewBA);
- }
- }
- ActionList.clear();
-
- if (Branch) {
- // If the landing pad was already in outlined form, just update its targets.
- for (unsigned int I = Branch->getNumDestinations(); I > 0; --I)
- Branch->removeDestination(I);
- // Add the previously collected action targets.
- for (auto *Target : ActionTargets)
- Branch->addDestination(Target->getBasicBlock());
- } else {
- // If the landing pad was previously stubbed out, fill in its outlined form.
- IntrinsicInst *NewEHActions = cast<IntrinsicInst>(EHActions->clone());
- OutlinedBB->getInstList().push_back(NewEHActions);
-
- // Insert an indirect branch into the outlined landing pad BB.
- IndirectBrInst *IBr = IndirectBrInst::Create(NewEHActions, 0, OutlinedBB);
- // Add the previously collected action targets.
- for (auto *Target : ActionTargets)
- IBr->addDestination(Target->getBasicBlock());
- }
-}
-
-// This function examines a block to determine whether the block ends with a
-// conditional branch to a catch handler based on a selector comparison.
-// This function is used both by the WinEHPrepare::findSelectorComparison() and
-// WinEHCleanupDirector::handleTypeIdFor().
-static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
- Constant *&Selector, BasicBlock *&NextBB) {
- ICmpInst::Predicate Pred;
- BasicBlock *TBB, *FBB;
- Value *LHS, *RHS;
-
- if (!match(BB->getTerminator(),
- m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB)))
- return false;
-
- if (!match(LHS,
- m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) &&
- !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))))
- return false;
-
- if (Pred == CmpInst::ICMP_EQ) {
- CatchHandler = TBB;
- NextBB = FBB;
- return true;
- }
-
- if (Pred == CmpInst::ICMP_NE) {
- CatchHandler = FBB;
- NextBB = TBB;
- return true;
+ calculateCXXStateNumbers(FuncInfo, FirstNonPHI, -1);
}
- return false;
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
}
-static bool isCatchBlock(BasicBlock *BB) {
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II) {
- if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_begincatch>()))
- return true;
- }
- return false;
-}
-
-static BasicBlock *createStubLandingPad(Function *Handler) {
- // FIXME: Finish this!
- LLVMContext &Context = Handler->getContext();
- BasicBlock *StubBB = BasicBlock::Create(Context, "stub");
- Handler->getBasicBlockList().push_back(StubBB);
- IRBuilder<> Builder(StubBB);
- LandingPadInst *LPad = Builder.CreateLandingPad(
- llvm::StructType::get(Type::getInt8PtrTy(Context),
- Type::getInt32Ty(Context), nullptr),
- 0);
- // Insert a call to llvm.eh.actions so that we don't try to outline this lpad.
- Function *ActionIntrin =
- Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::eh_actions);
- Builder.CreateCall(ActionIntrin, {}, "recover");
- LPad->setCleanup(true);
- Builder.CreateUnreachable();
- return StubBB;
-}
-
-// Cycles through the blocks in an outlined handler function looking for an
-// invoke instruction and inserts an invoke of llvm.donothing with an empty
-// landing pad if none is found. The code that generates the .xdata tables for
-// the handler needs at least one landing pad to identify the parent function's
-// personality.
-void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler) {
- ReturnInst *Ret = nullptr;
- UnreachableInst *Unreached = nullptr;
- for (BasicBlock &BB : *Handler) {
- TerminatorInst *Terminator = BB.getTerminator();
- // If we find an invoke, there is nothing to be done.
- auto *II = dyn_cast<InvokeInst>(Terminator);
- if (II)
- return;
- // If we've already recorded a return instruction, keep looking for invokes.
- if (!Ret)
- Ret = dyn_cast<ReturnInst>(Terminator);
- // If we haven't recorded an unreachable instruction, try this terminator.
- if (!Unreached)
- Unreached = dyn_cast<UnreachableInst>(Terminator);
- }
-
- // If we got this far, the handler contains no invokes. We should have seen
- // at least one return or unreachable instruction. We'll insert an invoke of
- // llvm.donothing ahead of that instruction.
- assert(Ret || Unreached);
- TerminatorInst *Term;
- if (Ret)
- Term = Ret;
- else
- Term = Unreached;
- BasicBlock *OldRetBB = Term->getParent();
- BasicBlock *NewRetBB = SplitBlock(OldRetBB, Term, DT);
- // SplitBlock adds an unconditional branch instruction at the end of the
- // parent block. We want to replace that with an invoke call, so we can
- // erase it now.
- OldRetBB->getTerminator()->eraseFromParent();
- BasicBlock *StubLandingPad = createStubLandingPad(Handler);
- Function *F =
- Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing);
- InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB);
+static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState,
+ ClrHandlerType HandlerType, uint32_t TypeToken,
+ const BasicBlock *Handler) {
+ ClrEHUnwindMapEntry Entry;
+ Entry.Parent = ParentState;
+ Entry.Handler = Handler;
+ Entry.HandlerType = HandlerType;
+ Entry.TypeToken = TypeToken;
+ FuncInfo.ClrEHUnwindMap.push_back(Entry);
+ return FuncInfo.ClrEHUnwindMap.size() - 1;
}
-// FIXME: Consider sinking this into lib/Target/X86 somehow. TargetLowering
-// usually doesn't build LLVM IR, so that's probably the wrong place.
-Function *WinEHPrepare::createHandlerFunc(Function *ParentFn, Type *RetTy,
- const Twine &Name, Module *M,
- Value *&ParentFP) {
- // x64 uses a two-argument prototype where the parent FP is the second
- // argument. x86 uses no arguments, just the incoming EBP value.
- LLVMContext &Context = M->getContext();
- Type *Int8PtrType = Type::getInt8PtrTy(Context);
- FunctionType *FnType;
- if (TheTriple.getArch() == Triple::x86_64) {
- Type *ArgTys[2] = {Int8PtrType, Int8PtrType};
- FnType = FunctionType::get(RetTy, ArgTys, false);
- } else {
- FnType = FunctionType::get(RetTy, None, false);
- }
-
- Function *Handler =
- Function::Create(FnType, GlobalVariable::InternalLinkage, Name, M);
- BasicBlock *Entry = BasicBlock::Create(Context, "entry");
- Handler->getBasicBlockList().push_front(Entry);
- if (TheTriple.getArch() == Triple::x86_64) {
- ParentFP = &(Handler->getArgumentList().back());
- } else {
- assert(M);
- Function *FrameAddressFn =
- Intrinsic::getDeclaration(M, Intrinsic::frameaddress);
- Function *RecoverFPFn =
- Intrinsic::getDeclaration(M, Intrinsic::x86_seh_recoverfp);
- IRBuilder<> Builder(&Handler->getEntryBlock());
- Value *EBP =
- Builder.CreateCall(FrameAddressFn, {Builder.getInt32(1)}, "ebp");
- Value *ParentI8Fn = Builder.CreateBitCast(ParentFn, Int8PtrType);
- ParentFP = Builder.CreateCall(RecoverFPFn, {ParentI8Fn, EBP});
- }
- return Handler;
-}
+void llvm::calculateClrEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Return if it's already been done.
+ if (!FuncInfo.EHPadStateMap.empty())
+ return;
-bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
- LandingPadInst *LPad, BasicBlock *StartBB,
- FrameVarInfoMap &VarInfo) {
- Module *M = SrcFn->getParent();
- LLVMContext &Context = M->getContext();
- Type *Int8PtrType = Type::getInt8PtrTy(Context);
-
- // Create a new function to receive the handler contents.
- Value *ParentFP;
- Function *Handler;
- if (Action->getType() == Catch) {
- Handler = createHandlerFunc(SrcFn, Int8PtrType, SrcFn->getName() + ".catch", M,
- ParentFP);
- } else {
- Handler = createHandlerFunc(SrcFn, Type::getVoidTy(Context),
- SrcFn->getName() + ".cleanup", M, ParentFP);
- }
- Handler->setPersonalityFn(SrcFn->getPersonalityFn());
- HandlerToParentFP[Handler] = ParentFP;
- Handler->addFnAttr("wineh-parent", SrcFn->getName());
- BasicBlock *Entry = &Handler->getEntryBlock();
-
- // Generate a standard prolog to setup the frame recovery structure.
- IRBuilder<> Builder(Context);
- Builder.SetInsertPoint(Entry);
- Builder.SetCurrentDebugLocation(LPad->getDebugLoc());
-
- std::unique_ptr<WinEHCloningDirectorBase> Director;
-
- ValueToValueMapTy VMap;
-
- LandingPadMap &LPadMap = LPadMaps[LPad];
- if (!LPadMap.isInitialized())
- LPadMap.mapLandingPad(LPad);
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- Constant *Sel = CatchAction->getSelector();
- Director.reset(new WinEHCatchDirector(Handler, ParentFP, Sel, VarInfo,
- LPadMap, NestedLPtoOriginalLP, DT,
- EHBlocks));
- LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
- ConstantInt::get(Type::getInt32Ty(Context), 1));
- } else {
- Director.reset(
- new WinEHCleanupDirector(Handler, ParentFP, VarInfo, LPadMap));
- LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
- UndefValue::get(Type::getInt32Ty(Context)));
- }
+ SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
- SmallVector<ReturnInst *, 8> Returns;
- ClonedCodeInfo OutlinedFunctionInfo;
-
- // If the start block contains PHI nodes, we need to map them.
- BasicBlock::iterator II = StartBB->begin();
- while (auto *PN = dyn_cast<PHINode>(II)) {
- bool Mapped = false;
- // Look for PHI values that we have already mapped (such as the selector).
- for (Value *Val : PN->incoming_values()) {
- if (VMap.count(Val)) {
- VMap[PN] = VMap[Val];
- Mapped = true;
- }
- }
- // If we didn't find a match for this value, map it as an undef.
- if (!Mapped) {
- VMap[PN] = UndefValue::get(PN->getType());
- }
- ++II;
+ // Each pad needs to be able to refer to its parent, so scan the function
+ // looking for top-level handlers and seed the worklist with them.
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
+ continue;
+ if (BB.isLandingPad())
+ report_fatal_error("CoreCLR EH cannot use landingpads");
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
+ continue;
+ // queue this with sentinel parent state -1 to mean unwind to caller.
+ Worklist.emplace_back(FirstNonPHI, -1);
}
- // The landing pad value may be used by PHI nodes. It will ultimately be
- // eliminated, but we need it in the map for intermediate handling.
- VMap[LPad] = UndefValue::get(LPad->getType());
-
- // Skip over PHIs and, if applicable, landingpad instructions.
- II = StartBB->getFirstInsertionPt();
-
- CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap,
- /*ModuleLevelChanges=*/false, Returns, "",
- &OutlinedFunctionInfo, Director.get());
-
- // Move all the instructions in the cloned "entry" block into our entry block.
- // Depending on how the parent function was laid out, the block that will
- // correspond to the outlined entry block may not be the first block in the
- // list. We can recognize it, however, as the cloned block which has no
- // predecessors. Any other block wouldn't have been cloned if it didn't
- // have a predecessor which was also cloned.
- Function::iterator ClonedIt = std::next(Function::iterator(Entry));
- while (!pred_empty(ClonedIt))
- ++ClonedIt;
- BasicBlock *ClonedEntryBB = ClonedIt;
- assert(ClonedEntryBB);
- Entry->getInstList().splice(Entry->end(), ClonedEntryBB->getInstList());
- ClonedEntryBB->eraseFromParent();
-
- // Make sure we can identify the handler's personality later.
- addStubInvokeToHandlerIfNeeded(Handler);
-
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- WinEHCatchDirector *CatchDirector =
- reinterpret_cast<WinEHCatchDirector *>(Director.get());
- CatchAction->setExceptionVar(CatchDirector->getExceptionVar());
- CatchAction->setReturnTargets(CatchDirector->getReturnTargets());
-
- // Look for blocks that are not part of the landing pad that we just
- // outlined but terminate with a call to llvm.eh.endcatch and a
- // branch to a block that is in the handler we just outlined.
- // These blocks will be part of a nested landing pad that intends to
- // return to an address in this handler. This case is best handled
- // after both landing pads have been outlined, so for now we'll just
- // save the association of the blocks in LPadTargetBlocks. The
- // return instructions which are created from these branches will be
- // replaced after all landing pads have been outlined.
- for (const auto MapEntry : VMap) {
- // VMap maps all values and blocks that were just cloned, but dead
- // blocks which were pruned will map to nullptr.
- if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr)
+ while (!Worklist.empty()) {
+ const Instruction *Pad;
+ int ParentState;
+ std::tie(Pad, ParentState) = Worklist.pop_back_val();
+
+ Value *ParentPad;
+ int PredState;
+ if (const CleanupPadInst *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
+ // A cleanup can have multiple exits; don't re-process after the first.
+ if (FuncInfo.EHPadStateMap.count(Cleanup))
continue;
- const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first);
- for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) {
- auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator());
- if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1)
- continue;
- BasicBlock::iterator II = const_cast<BranchInst *>(Branch);
- --II;
- if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) {
- // This would indicate that a nested landing pad wants to return
- // to a block that is outlined into two different handlers.
- assert(!LPadTargetBlocks.count(MappedBB));
- LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second);
+ // CoreCLR personality uses arity to distinguish faults from finallies.
+ const BasicBlock *PadBlock = Cleanup->getParent();
+ ClrHandlerType HandlerType =
+ (Cleanup->getNumOperands() ? ClrHandlerType::Fault
+ : ClrHandlerType::Finally);
+ int NewState =
+ addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock);
+ FuncInfo.EHPadStateMap[Cleanup] = NewState;
+ // Propagate the new state to all preds of the cleanup
+ ParentPad = Cleanup->getParentPad();
+ PredState = NewState;
+ } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ SmallVector<const CatchPadInst *, 1> Handlers;
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ const auto *Catch = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
+ Handlers.push_back(Catch);
+ }
+ FuncInfo.EHPadStateMap[CatchSwitch] = ParentState;
+ int NewState = ParentState;
+ for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend();
+ HandlerI != HandlerE; ++HandlerI) {
+ const CatchPadInst *Catch = *HandlerI;
+ const BasicBlock *PadBlock = Catch->getParent();
+ uint32_t TypeToken = static_cast<uint32_t>(
+ cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue());
+ NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch,
+ TypeToken, PadBlock);
+ FuncInfo.EHPadStateMap[Catch] = NewState;
+ }
+ for (const auto *CatchPad : Handlers) {
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ Worklist.emplace_back(UserI, ParentState);
}
}
+ PredState = NewState;
+ ParentPad = CatchSwitch->getParentPad();
+ } else {
+ llvm_unreachable("Unexpected EH pad");
}
- } // End if (CatchAction)
-
- Action->setHandlerBlockOrFunc(Handler);
-
- return true;
-}
-
-/// This BB must end in a selector dispatch. All we need to do is pass the
-/// handler block to llvm.eh.actions and list it as a possible indirectbr
-/// target.
-void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction,
- BasicBlock *StartBB) {
- BasicBlock *HandlerBB;
- BasicBlock *NextBB;
- Constant *Selector;
- bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB);
- if (Res) {
- // If this was EH dispatch, this must be a conditional branch to the handler
- // block.
- // FIXME: Handle instructions in the dispatch block. Currently we drop them,
- // leading to crashes if some optimization hoists stuff here.
- assert(CatchAction->getSelector() && HandlerBB &&
- "expected catch EH dispatch");
- } else {
- // This must be a catch-all. Split the block after the landingpad.
- assert(CatchAction->getSelector()->isNullValue() && "expected catch-all");
- HandlerBB = SplitBlock(StartBB, StartBB->getFirstInsertionPt(), DT);
- }
- IRBuilder<> Builder(HandlerBB->getFirstInsertionPt());
- Function *EHCodeFn = Intrinsic::getDeclaration(
- StartBB->getParent()->getParent(), Intrinsic::eh_exceptioncode);
- Value *Code = Builder.CreateCall(EHCodeFn, {}, "sehcode");
- Code = Builder.CreateIntToPtr(Code, SEHExceptionCodeSlot->getAllocatedType());
- Builder.CreateStore(Code, SEHExceptionCodeSlot);
- CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB));
- TinyPtrVector<BasicBlock *> Targets(HandlerBB);
- CatchAction->setReturnTargets(Targets);
-}
-void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
- // Each instance of this class should only ever be used to map a single
- // landing pad.
- assert(OriginLPad == nullptr || OriginLPad == LPad);
-
- // If the landing pad has already been mapped, there's nothing more to do.
- if (OriginLPad == LPad)
- return;
-
- OriginLPad = LPad;
-
- // The landingpad instruction returns an aggregate value. Typically, its
- // value will be passed to a pair of extract value instructions and the
- // results of those extracts will have been promoted to reg values before
- // this routine is called.
- for (auto *U : LPad->users()) {
- const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
- if (!Extract)
- continue;
- assert(Extract->getNumIndices() == 1 &&
- "Unexpected operation: extracting both landing pad values");
- unsigned int Idx = *(Extract->idx_begin());
- assert((Idx == 0 || Idx == 1) &&
- "Unexpected operation: extracting an unknown landing pad element");
- if (Idx == 0) {
- ExtractedEHPtrs.push_back(Extract);
- } else if (Idx == 1) {
- ExtractedSelectors.push_back(Extract);
+ // Queue all predecessors with the given state
+ for (const BasicBlock *Pred : predecessors(Pad->getParent())) {
+ if ((Pred = getEHPadFromPredecessor(Pred, ParentPad)))
+ Worklist.emplace_back(Pred->getFirstNonPHI(), PredState);
}
}
-}
-bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const {
- return BB->getLandingPadInst() == OriginLPad;
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
}
-bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
- if (Inst == OriginLPad)
- return true;
- for (auto *Extract : ExtractedEHPtrs) {
- if (Inst == Extract)
- return true;
- }
- for (auto *Extract : ExtractedSelectors) {
- if (Inst == Extract)
- return true;
- }
- return false;
-}
-
-void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
- Value *SelectorValue) const {
- // Remap all landing pad extract instructions to the specified values.
- for (auto *Extract : ExtractedEHPtrs)
- VMap[Extract] = EHPtrValue;
- for (auto *Extract : ExtractedSelectors)
- VMap[Extract] = SelectorValue;
-}
-
-static bool isLocalAddressCall(const Value *V) {
- return match(const_cast<Value *>(V), m_Intrinsic<Intrinsic::localaddress>());
-}
-
-CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // If this is one of the boilerplate landing pad instructions, skip it.
- // The instruction will have already been remapped in VMap.
- if (LPadMap.isLandingPadSpecificInst(Inst))
- return CloningDirector::SkipInstruction;
-
- // Nested landing pads that have not already been outlined will be cloned as
- // stubs, with just the landingpad instruction and an unreachable instruction.
- // When all landingpads have been outlined, we'll replace this with the
- // llvm.eh.actions call and indirect branch created when the landing pad was
- // outlined.
- if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) {
- return handleLandingPad(VMap, LPad, NewBB);
- }
+void WinEHPrepare::colorFunclets(Function &F) {
+ BlockColors = colorEHFunclets(F);
- // Nested landing pads that have already been outlined will be cloned in their
- // outlined form, but we need to intercept the ibr instruction to filter out
- // targets that do not return to the handler we are outlining.
- if (auto *IBr = dyn_cast<IndirectBrInst>(Inst)) {
- return handleIndirectBr(VMap, IBr, NewBB);
- }
-
- if (auto *Invoke = dyn_cast<InvokeInst>(Inst))
- return handleInvoke(VMap, Invoke, NewBB);
-
- if (auto *Resume = dyn_cast<ResumeInst>(Inst))
- return handleResume(VMap, Resume, NewBB);
-
- if (auto *Cmp = dyn_cast<CmpInst>(Inst))
- return handleCompare(VMap, Cmp, NewBB);
-
- if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
- return handleBeginCatch(VMap, Inst, NewBB);
- if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
- return handleEndCatch(VMap, Inst, NewBB);
- if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
- return handleTypeIdFor(VMap, Inst, NewBB);
-
- // When outlining llvm.localaddress(), remap that to the second argument,
- // which is the FP of the parent.
- if (isLocalAddressCall(Inst)) {
- VMap[Inst] = ParentFP;
- return CloningDirector::SkipInstruction;
- }
-
- // Continue with the default cloning behavior.
- return CloningDirector::CloneInstruction;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad(
- ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
- // If the instruction after the landing pad is a call to llvm.eh.actions
- // the landing pad has already been outlined. In this case, we should
- // clone it because it may return to a block in the handler we are
- // outlining now that would otherwise be unreachable. The landing pads
- // are sorted before outlining begins to enable this case to work
- // properly.
- const Instruction *NextI = LPad->getNextNode();
- if (match(NextI, m_Intrinsic<Intrinsic::eh_actions>()))
- return CloningDirector::CloneInstruction;
-
- // If the landing pad hasn't been outlined yet, the landing pad we are
- // outlining now does not dominate it and so it cannot return to a block
- // in this handler. In that case, we can just insert a stub landing
- // pad now and patch it up later.
- Instruction *NewInst = LPad->clone();
- if (LPad->hasName())
- NewInst->setName(LPad->getName());
- // Save this correlation for later processing.
- NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad;
- VMap[LPad] = NewInst;
- BasicBlock::InstListType &InstList = NewBB->getInstList();
- InstList.push_back(NewInst);
- InstList.push_back(new UnreachableInst(NewBB->getContext()));
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // The argument to the call is some form of the first element of the
- // landingpad aggregate value, but that doesn't matter. It isn't used
- // here.
- // The second argument is an outparameter where the exception object will be
- // stored. Typically the exception object is a scalar, but it can be an
- // aggregate when catching by value.
- // FIXME: Leave something behind to indicate where the exception object lives
- // for this handler. Should it be part of llvm.eh.actions?
- assert(ExceptionObjectVar == nullptr && "Multiple calls to "
- "llvm.eh.begincatch found while "
- "outlining catch handler.");
- ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts();
- if (isa<ConstantPointerNull>(ExceptionObjectVar))
- return CloningDirector::SkipInstruction;
- assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() &&
- "catch parameter is not static alloca");
- Materializer.escapeCatchObject(ExceptionObjectVar);
- return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction
-WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst, BasicBlock *NewBB) {
- auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
- // It might be interesting to track whether or not we are inside a catch
- // function, but that might make the algorithm more brittle than it needs
- // to be.
-
- // The end catch call can occur in one of two places: either in a
- // landingpad block that is part of the catch handlers exception mechanism,
- // or at the end of the catch block. However, a catch-all handler may call
- // end catch from the original landing pad. If the call occurs in a nested
- // landing pad block, we must skip it and continue so that the landing pad
- // gets cloned.
- auto *ParentBB = IntrinCall->getParent();
- if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB))
- return CloningDirector::SkipInstruction;
-
- // If an end catch occurs anywhere else we want to terminate the handler
- // with a return to the code that follows the endcatch call. If the
- // next instruction is not an unconditional branch, we need to split the
- // block to provide a clear target for the return instruction.
- BasicBlock *ContinueBB;
- auto Next = std::next(BasicBlock::const_iterator(IntrinCall));
- const BranchInst *Branch = dyn_cast<BranchInst>(Next);
- if (!Branch || !Branch->isUnconditional()) {
- // We're interrupting the cloning process at this location, so the
- // const_cast we're doing here will not cause a problem.
- ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB),
- const_cast<Instruction *>(cast<Instruction>(Next)));
- } else {
- ContinueBB = Branch->getSuccessor(0);
+ // Invert the map from BB to colors to color to BBs.
+ for (BasicBlock &BB : F) {
+ ColorVector &Colors = BlockColors[&BB];
+ for (BasicBlock *Color : Colors)
+ FuncletBlocks[Color].push_back(&BB);
}
-
- ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB);
- ReturnTargets.push_back(ContinueBB);
-
- // We just added a terminator to the cloned block.
- // Tell the caller to stop processing the current basic block so that
- // the branch instruction will be skipped.
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
- Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
- // This causes a replacement that will collapse the landing pad CFG based
- // on the filter function we intend to match.
- if (Selector == CurrentSelector)
- VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
- else
- VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
- // Tell the caller not to clone this instruction.
- return CloningDirector::SkipInstruction;
}
-CloningDirector::CloningAction WinEHCatchDirector::handleIndirectBr(
- ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) {
- // If this indirect branch is not part of a landing pad block, just clone it.
- const BasicBlock *ParentBB = IBr->getParent();
- if (!ParentBB->isLandingPad())
- return CloningDirector::CloneInstruction;
-
- // If it is part of a landing pad, we want to filter out target blocks
- // that are not part of the handler we are outlining.
- const LandingPadInst *LPad = ParentBB->getLandingPadInst();
-
- // Save this correlation for later processing.
- NestedLPtoOriginalLP[cast<LandingPadInst>(VMap[LPad])] = LPad;
-
- // We should only get here for landing pads that have already been outlined.
- assert(match(LPad->getNextNode(), m_Intrinsic<Intrinsic::eh_actions>()));
-
- // Copy the indirectbr, but only include targets that were previously
- // identified as EH blocks and are dominated by the nested landing pad.
- SetVector<const BasicBlock *> ReturnTargets;
- for (int I = 0, E = IBr->getNumDestinations(); I < E; ++I) {
- auto *TargetBB = IBr->getDestination(I);
- if (EHBlocks.count(const_cast<BasicBlock*>(TargetBB)) &&
- DT->dominates(ParentBB, TargetBB)) {
- DEBUG(dbgs() << " Adding destination " << TargetBB->getName() << "\n");
- ReturnTargets.insert(TargetBB);
- }
+void llvm::calculateCatchReturnSuccessorColors(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ for (const BasicBlock &BB : *Fn) {
+ const auto *CatchRet = dyn_cast<CatchReturnInst>(BB.getTerminator());
+ if (!CatchRet)
+ continue;
+ // A 'catchret' returns to the outer scope's color.
+ Value *ParentPad = CatchRet->getParentPad();
+ const BasicBlock *Color;
+ if (isa<ConstantTokenNone>(ParentPad))
+ Color = &Fn->getEntryBlock();
+ else
+ Color = cast<Instruction>(ParentPad)->getParent();
+ // Record the catchret successor's funclet membership.
+ FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color;
}
- IndirectBrInst *NewBranch =
- IndirectBrInst::Create(const_cast<Value *>(IBr->getAddress()),
- ReturnTargets.size(), NewBB);
- for (auto *Target : ReturnTargets)
- NewBranch->addDestination(const_cast<BasicBlock*>(Target));
-
- // The operands and targets of the branch instruction are remapped later
- // because it is a terminator. Tell the cloning code to clone the
- // blocks we just added to the target list.
- return CloningDirector::CloneSuccessors;
}
-CloningDirector::CloningAction
-WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap,
- const InvokeInst *Invoke, BasicBlock *NewBB) {
- return CloningDirector::CloneInstruction;
-}
+void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
+ // Strip PHI nodes off of EH pads.
+ SmallVector<PHINode *, 16> PHINodes;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+ BasicBlock *BB = &*FI++;
+ if (!BB->isEHPad())
+ continue;
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = &*BI++;
+ auto *PN = dyn_cast<PHINode>(I);
+ // Stop at the first non-PHI.
+ if (!PN)
+ break;
-CloningDirector::CloningAction
-WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap,
- const ResumeInst *Resume, BasicBlock *NewBB) {
- // Resume instructions shouldn't be reachable from catch handlers.
- // We still need to handle it, but it will be pruned.
- BasicBlock::InstListType &InstList = NewBB->getInstList();
- InstList.push_back(new UnreachableInst(NewBB->getContext()));
- return CloningDirector::StopCloningBB;
-}
+ AllocaInst *SpillSlot = insertPHILoads(PN, F);
+ if (SpillSlot)
+ insertPHIStores(PN, SpillSlot);
-CloningDirector::CloningAction
-WinEHCatchDirector::handleCompare(ValueToValueMapTy &VMap,
- const CmpInst *Compare, BasicBlock *NewBB) {
- const IntrinsicInst *IntrinCall = nullptr;
- if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
- IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(0));
- } else if (match(Compare->getOperand(1),
- m_Intrinsic<Intrinsic::eh_typeid_for>())) {
- IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(1));
- }
- if (IntrinCall) {
- Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
- // This causes a replacement that will collapse the landing pad CFG based
- // on the filter function we intend to match.
- if (Selector == CurrentSelector->stripPointerCasts()) {
- VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
- } else {
- VMap[Compare] = ConstantInt::get(SelectorIDType, 0);
+ PHINodes.push_back(PN);
}
- return CloningDirector::SkipInstruction;
}
- return CloningDirector::CloneInstruction;
-}
-CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad(
- ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
- // The MS runtime will terminate the process if an exception occurs in a
- // cleanup handler, so we shouldn't encounter landing pads in the actual
- // cleanup code, but they may appear in catch blocks. Depending on where
- // we started cloning we may see one, but it will get dropped during dead
- // block pruning.
- Instruction *NewInst = new UnreachableInst(NewBB->getContext());
- VMap[LPad] = NewInst;
- BasicBlock::InstListType &InstList = NewBB->getInstList();
- InstList.push_back(NewInst);
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // Cleanup code may flow into catch blocks or the catch block may be part
- // of a branch that will be optimized away. We'll insert a return
- // instruction now, but it may be pruned before the cloning process is
- // complete.
- ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // Cleanup handlers nested within catch handlers may begin with a call to
- // eh.endcatch. We can just ignore that instruction.
- return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // If we encounter a selector comparison while cloning a cleanup handler,
- // we want to stop cloning immediately. Anything after the dispatch
- // will be outlined into a different handler.
- BasicBlock *CatchHandler;
- Constant *Selector;
- BasicBlock *NextBB;
- if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()),
- CatchHandler, Selector, NextBB)) {
- ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
- return CloningDirector::StopCloningBB;
- }
- // If eg.typeid.for is called for any other reason, it can be ignored.
- VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
- return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleIndirectBr(
- ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) {
- // No special handling is required for cleanup cloning.
- return CloningDirector::CloneInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
- ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) {
- // All invokes in cleanup handlers can be replaced with calls.
- SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3);
- // Insert a normal call instruction...
- CallInst *NewCall =
- CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs,
- Invoke->getName(), NewBB);
- NewCall->setCallingConv(Invoke->getCallingConv());
- NewCall->setAttributes(Invoke->getAttributes());
- NewCall->setDebugLoc(Invoke->getDebugLoc());
- VMap[Invoke] = NewCall;
-
- // Remap the operands.
- llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer);
-
- // Insert an unconditional branch to the normal destination.
- BranchInst::Create(Invoke->getNormalDest(), NewBB);
-
- // The unwind destination won't be cloned into the new function, so
- // we don't need to clean up its phi nodes.
-
- // We just added a terminator to the cloned block.
- // Tell the caller to stop processing the current basic block.
- return CloningDirector::CloneSuccessors;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
- ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) {
- ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
-
- // We just added a terminator to the cloned block.
- // Tell the caller to stop processing the current basic block so that
- // the branch instruction will be skipped.
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction
-WinEHCleanupDirector::handleCompare(ValueToValueMapTy &VMap,
- const CmpInst *Compare, BasicBlock *NewBB) {
- if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>()) ||
- match(Compare->getOperand(1), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
- VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
- return CloningDirector::SkipInstruction;
+ for (auto *PN : PHINodes) {
+ // There may be lingering uses on other EH PHIs being removed
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
}
- return CloningDirector::CloneInstruction;
-}
-
-WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
- Function *OutlinedFn, Value *ParentFP, FrameVarInfoMap &FrameVarInfo)
- : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
- BasicBlock *EntryBB = &OutlinedFn->getEntryBlock();
-
- // New allocas should be inserted in the entry block, but after the parent FP
- // is established if it is an instruction.
- Instruction *InsertPoint = EntryBB->getFirstInsertionPt();
- if (auto *FPInst = dyn_cast<Instruction>(ParentFP))
- InsertPoint = FPInst->getNextNode();
- Builder.SetInsertPoint(EntryBB, InsertPoint);
}
-Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
- // If we're asked to materialize a static alloca, we temporarily create an
- // alloca in the outlined function and add this to the FrameVarInfo map. When
- // all the outlining is complete, we'll replace these temporary allocas with
- // calls to llvm.localrecover.
- if (auto *AV = dyn_cast<AllocaInst>(V)) {
- assert(AV->isStaticAlloca() &&
- "cannot materialize un-demoted dynamic alloca");
- AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone());
- Builder.Insert(NewAlloca, AV->getName());
- FrameVarInfo[AV].push_back(NewAlloca);
- return NewAlloca;
- }
-
- if (isa<Instruction>(V) || isa<Argument>(V)) {
- Function *Parent = isa<Instruction>(V)
- ? cast<Instruction>(V)->getParent()->getParent()
- : cast<Argument>(V)->getParent();
- errs()
- << "Failed to demote instruction used in exception handler of function "
- << GlobalValue::getRealLinkageName(Parent->getName()) << ":\n";
- errs() << " " << *V << '\n';
- report_fatal_error("WinEHPrepare failed to demote instruction");
- }
-
- // Don't materialize other values.
- return nullptr;
-}
+void WinEHPrepare::cloneCommonBlocks(Function &F) {
+ // We need to clone all blocks which belong to multiple funclets. Values are
+ // remapped throughout the funclet to propogate both the new instructions
+ // *and* the new basic blocks themselves.
+ for (auto &Funclets : FuncletBlocks) {
+ BasicBlock *FuncletPadBB = Funclets.first;
+ std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
+
+ std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
+ ValueToValueMapTy VMap;
+ for (BasicBlock *BB : BlocksInFunclet) {
+ ColorVector &ColorsForBB = BlockColors[BB];
+ // We don't need to do anything if the block is monochromatic.
+ size_t NumColorsForBB = ColorsForBB.size();
+ if (NumColorsForBB == 1)
+ continue;
-void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) {
- // Catch parameter objects have to live in the parent frame. When we see a use
- // of a catch parameter, add a sentinel to the multimap to indicate that it's
- // used from another handler. This will prevent us from trying to sink the
- // alloca into the handler and ensure that the catch parameter is present in
- // the call to llvm.localescape.
- FrameVarInfo[V].push_back(getCatchObjectSentinel());
-}
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Cloning block \'" << BB->getName()
+ << "\' for funclet \'" << FuncletPadBB->getName()
+ << "\'.\n");
-// This function maps the catch and cleanup handlers that are reachable from the
-// specified landing pad. The landing pad sequence will have this basic shape:
-//
-// <cleanup handler>
-// <selector comparison>
-// <catch handler>
-// <cleanup handler>
-// <selector comparison>
-// <catch handler>
-// <cleanup handler>
-// ...
-//
-// Any of the cleanup slots may be absent. The cleanup slots may be occupied by
-// any arbitrary control flow, but all paths through the cleanup code must
-// eventually reach the next selector comparison and no path can skip to a
-// different selector comparisons, though some paths may terminate abnormally.
-// Therefore, we will use a depth first search from the start of any given
-// cleanup block and stop searching when we find the next selector comparison.
-//
-// If the landingpad instruction does not have a catch clause, we will assume
-// that any instructions other than selector comparisons and catch handlers can
-// be ignored. In practice, these will only be the boilerplate instructions.
-//
-// The catch handlers may also have any control structure, but we are only
-// interested in the start of the catch handlers, so we don't need to actually
-// follow the flow of the catch handlers. The start of the catch handlers can
-// be located from the compare instructions, but they can be skipped in the
-// flow by following the contrary branch.
-void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
- LandingPadActions &Actions) {
- unsigned int NumClauses = LPad->getNumClauses();
- unsigned int HandlersFound = 0;
- BasicBlock *BB = LPad->getParent();
-
- DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n");
-
- if (NumClauses == 0) {
- findCleanupHandlers(Actions, BB, nullptr);
- return;
- }
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB =
+ CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName()));
+ // Insert the clone immediately after the original to ensure determinism
+ // and to keep the same relative ordering of any funclet's blocks.
+ CBB->insertInto(&F, BB->getNextNode());
- VisitedBlockSet VisitedBlocks;
+ // Add basic block mapping.
+ VMap[BB] = CBB;
- while (HandlersFound != NumClauses) {
- BasicBlock *NextBB = nullptr;
+ // Record delta operations that we need to perform to our color mappings.
+ Orig2Clone.emplace_back(BB, CBB);
+ }
- // Skip over filter clauses.
- if (LPad->isFilter(HandlersFound)) {
- ++HandlersFound;
+ // If nothing was cloned, we're done cloning in this funclet.
+ if (Orig2Clone.empty())
continue;
+
+ // Update our color mappings to reflect that one block has lost a color and
+ // another has gained a color.
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+
+ BlocksInFunclet.push_back(NewBlock);
+ ColorVector &NewColors = BlockColors[NewBlock];
+ assert(NewColors.empty() && "A new block should only have one color!");
+ NewColors.push_back(FuncletPadBB);
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Assigned color \'" << FuncletPadBB->getName()
+ << "\' to block \'" << NewBlock->getName()
+ << "\'.\n");
+
+ BlocksInFunclet.erase(
+ std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock),
+ BlocksInFunclet.end());
+ ColorVector &OldColors = BlockColors[OldBlock];
+ OldColors.erase(
+ std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB),
+ OldColors.end());
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Removed color \'" << FuncletPadBB->getName()
+ << "\' from block \'" << OldBlock->getName()
+ << "\'.\n");
}
- // See if the clause we're looking for is a catch-all.
- // If so, the catch begins immediately.
- Constant *ExpectedSelector =
- LPad->getClause(HandlersFound)->stripPointerCasts();
- if (isa<ConstantPointerNull>(ExpectedSelector)) {
- // The catch all must occur last.
- assert(HandlersFound == NumClauses - 1);
-
- // There can be additional selector dispatches in the call chain that we
- // need to ignore.
- BasicBlock *CatchBlock = nullptr;
- Constant *Selector;
- while (BB && isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
- DEBUG(dbgs() << " Found extra catch dispatch in block "
- << CatchBlock->getName() << "\n");
- BB = NextBB;
+ // Loop over all of the instructions in this funclet, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (BasicBlock *BB : BlocksInFunclet)
+ // Loop over all instructions, fixing each one as we find it...
+ for (Instruction &I : *BB)
+ RemapInstruction(&I, VMap,
+ RF_IgnoreMissingEntries | RF_NoModuleLevelChanges);
+
+ auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
+ unsigned NumPreds = PN->getNumIncomingValues();
+ for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
+ ++PredIdx) {
+ BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
+ ColorVector &IncomingColors = BlockColors[IncomingBlock];
+ bool BlockInFunclet = IncomingColors.size() == 1 &&
+ IncomingColors.front() == FuncletPadBB;
+ if (IsForOldBlock != BlockInFunclet)
+ continue;
+ PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
+ // Revisit the next entry.
+ --PredIdx;
+ --PredEnd;
}
-
- // Add the catch handler to the action list.
- CatchHandler *Action = nullptr;
- if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
- // If the CatchHandlerMap already has an entry for this BB, re-use it.
- Action = CatchHandlerMap[BB];
- assert(Action->getSelector() == ExpectedSelector);
- } else {
- // We don't expect a selector dispatch, but there may be a call to
- // llvm.eh.begincatch, which separates catch handling code from
- // cleanup code in the same control flow. This call looks for the
- // begincatch intrinsic.
- Action = findCatchHandler(BB, NextBB, VisitedBlocks);
- if (Action) {
- // For C++ EH, check if there is any interesting cleanup code before
- // we begin the catch. This is important because cleanups cannot
- // rethrow exceptions but code called from catches can. For SEH, it
- // isn't important if some finally code before a catch-all is executed
- // out of line or after recovering from the exception.
- if (Personality == EHPersonality::MSVC_CXX)
- findCleanupHandlers(Actions, BB, BB);
- } else {
- // If an action was not found, it means that the control flows
- // directly into the catch-all handler and there is no cleanup code.
- // That's an expected situation and we must create a catch action.
- // Since this is a catch-all handler, the selector won't actually
- // appear in the code anywhere. ExpectedSelector here is the constant
- // null ptr that we got from the landing pad instruction.
- Action = new CatchHandler(BB, ExpectedSelector, nullptr);
- CatchHandlerMap[BB] = Action;
- }
+ };
+
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+ for (Instruction &OldI : *OldBlock) {
+ auto *OldPN = dyn_cast<PHINode>(&OldI);
+ if (!OldPN)
+ break;
+ UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true);
}
- Actions.insertCatchHandler(Action);
- DEBUG(dbgs() << " Catch all handler at block " << BB->getName() << "\n");
- ++HandlersFound;
-
- // Once we reach a catch-all, don't expect to hit a resume instruction.
- BB = nullptr;
- break;
- }
-
- CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks);
- assert(CatchAction);
-
- // See if there is any interesting code executed before the dispatch.
- findCleanupHandlers(Actions, BB, CatchAction->getStartBlock());
-
- // When the source program contains multiple nested try blocks the catch
- // handlers can get strung together in such a way that we can encounter
- // a dispatch for a selector that we've already had a handler for.
- if (CatchAction->getSelector()->stripPointerCasts() == ExpectedSelector) {
- ++HandlersFound;
-
- // Add the catch handler to the action list.
- DEBUG(dbgs() << " Found catch dispatch in block "
- << CatchAction->getStartBlock()->getName() << "\n");
- Actions.insertCatchHandler(CatchAction);
- } else {
- // Under some circumstances optimized IR will flow unconditionally into a
- // handler block without checking the selector. This can only happen if
- // the landing pad has a catch-all handler and the handler for the
- // preceeding catch clause is identical to the catch-call handler
- // (typically an empty catch). In this case, the handler must be shared
- // by all remaining clauses.
- if (isa<ConstantPointerNull>(
- CatchAction->getSelector()->stripPointerCasts())) {
- DEBUG(dbgs() << " Applying early catch-all handler in block "
- << CatchAction->getStartBlock()->getName()
- << " to all remaining clauses.\n");
- Actions.insertCatchHandler(CatchAction);
- return;
+ for (Instruction &NewI : *NewBlock) {
+ auto *NewPN = dyn_cast<PHINode>(&NewI);
+ if (!NewPN)
+ break;
+ UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false);
}
-
- DEBUG(dbgs() << " Found extra catch dispatch in block "
- << CatchAction->getStartBlock()->getName() << "\n");
}
- // Move on to the block after the catch handler.
- BB = NextBB;
- }
-
- // If we didn't wind up in a catch-all, see if there is any interesting code
- // executed before the resume.
- findCleanupHandlers(Actions, BB, BB);
-
- // It's possible that some optimization moved code into a landingpad that
- // wasn't
- // previously being used for cleanup. If that happens, we need to execute
- // that
- // extra code from a cleanup handler.
- if (Actions.includesCleanup() && !LPad->isCleanup())
- LPad->setCleanup(true);
-}
-
-// This function searches starting with the input block for the next
-// block that terminates with a branch whose condition is based on a selector
-// comparison. This may be the input block. See the mapLandingPadBlocks
-// comments for a discussion of control flow assumptions.
-//
-CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB,
- BasicBlock *&NextBB,
- VisitedBlockSet &VisitedBlocks) {
- // See if we've already found a catch handler use it.
- // Call count() first to avoid creating a null entry for blocks
- // we haven't seen before.
- if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
- CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]);
- NextBB = Action->getNextBB();
- return Action;
- }
+ // Check to see if SuccBB has PHI nodes. If so, we need to add entries to
+ // the PHI nodes for NewBB now.
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+ for (BasicBlock *SuccBB : successors(NewBlock)) {
+ for (Instruction &SuccI : *SuccBB) {
+ auto *SuccPN = dyn_cast<PHINode>(&SuccI);
+ if (!SuccPN)
+ break;
+
+ // Ok, we have a PHI node. Figure out what the incoming value was for
+ // the OldBlock.
+ int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock);
+ if (OldBlockIdx == -1)
+ break;
+ Value *IV = SuccPN->getIncomingValue(OldBlockIdx);
+
+ // Remap the value if necessary.
+ if (auto *Inst = dyn_cast<Instruction>(IV)) {
+ ValueToValueMapTy::iterator I = VMap.find(Inst);
+ if (I != VMap.end())
+ IV = I->second;
+ }
- // VisitedBlocks applies only to the current search. We still
- // need to consider blocks that we've visited while mapping other
- // landing pads.
- VisitedBlocks.insert(BB);
-
- BasicBlock *CatchBlock = nullptr;
- Constant *Selector = nullptr;
-
- // If this is the first time we've visited this block from any landing pad
- // look to see if it is a selector dispatch block.
- if (!CatchHandlerMap.count(BB)) {
- if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
- CatchHandler *Action = new CatchHandler(BB, Selector, NextBB);
- CatchHandlerMap[BB] = Action;
- return Action;
- }
- // If we encounter a block containing an llvm.eh.begincatch before we
- // find a selector dispatch block, the handler is assumed to be
- // reached unconditionally. This happens for catch-all blocks, but
- // it can also happen for other catch handlers that have been combined
- // with the catch-all handler during optimization.
- if (isCatchBlock(BB)) {
- PointerType *Int8PtrTy = Type::getInt8PtrTy(BB->getContext());
- Constant *NullSelector = ConstantPointerNull::get(Int8PtrTy);
- CatchHandler *Action = new CatchHandler(BB, NullSelector, nullptr);
- CatchHandlerMap[BB] = Action;
- return Action;
+ SuccPN->addIncoming(IV, NewBlock);
+ }
+ }
}
- }
- // Visit each successor, looking for the dispatch.
- // FIXME: We expect to find the dispatch quickly, so this will probably
- // work better as a breadth first search.
- for (BasicBlock *Succ : successors(BB)) {
- if (VisitedBlocks.count(Succ))
- continue;
+ for (ValueToValueMapTy::value_type VT : VMap) {
+ // If there were values defined in BB that are used outside the funclet,
+ // then we now have to update all uses of the value to use either the
+ // original value, the cloned value, or some PHI derived value. This can
+ // require arbitrary PHI insertion, of which we are prepared to do, clean
+ // these up now.
+ SmallVector<Use *, 16> UsesToRename;
- CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks);
- if (Action)
- return Action;
- }
- return nullptr;
-}
-
-// These are helper functions to combine repeated code from findCleanupHandlers.
-static void createCleanupHandler(LandingPadActions &Actions,
- CleanupHandlerMapTy &CleanupHandlerMap,
- BasicBlock *BB) {
- CleanupHandler *Action = new CleanupHandler(BB);
- CleanupHandlerMap[BB] = Action;
- Actions.insertCleanupHandler(Action);
- DEBUG(dbgs() << " Found cleanup code in block "
- << Action->getStartBlock()->getName() << "\n");
-}
-
-static CallSite matchOutlinedFinallyCall(BasicBlock *BB,
- Instruction *MaybeCall) {
- // Look for finally blocks that Clang has already outlined for us.
- // %fp = call i8* @llvm.localaddress()
- // call void @"fin$parent"(iN 1, i8* %fp)
- if (isLocalAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
- MaybeCall = MaybeCall->getNextNode();
- CallSite FinallyCall(MaybeCall);
- if (!FinallyCall || FinallyCall.arg_size() != 2)
- return CallSite();
- if (!match(FinallyCall.getArgument(0), m_SpecificInt(1)))
- return CallSite();
- if (!isLocalAddressCall(FinallyCall.getArgument(1)))
- return CallSite();
- return FinallyCall;
-}
-
-static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) {
- // Skip single ubr blocks.
- while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) {
- auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
- if (Br && Br->isUnconditional())
- BB = Br->getSuccessor(0);
- else
- return BB;
- }
- return BB;
-}
-
-// This function searches starting with the input block for the next block that
-// contains code that is not part of a catch handler and would not be eliminated
-// during handler outlining.
-//
-void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
- BasicBlock *StartBB, BasicBlock *EndBB) {
- // Here we will skip over the following:
- //
- // landing pad prolog:
- //
- // Unconditional branches
- //
- // Selector dispatch
- //
- // Resume pattern
- //
- // Anything else marks the start of an interesting block
-
- BasicBlock *BB = StartBB;
- // Anything other than an unconditional branch will kick us out of this loop
- // one way or another.
- while (BB) {
- BB = followSingleUnconditionalBranches(BB);
- // If we've already scanned this block, don't scan it again. If it is
- // a cleanup block, there will be an action in the CleanupHandlerMap.
- // If we've scanned it and it is not a cleanup block, there will be a
- // nullptr in the CleanupHandlerMap. If we have not scanned it, there will
- // be no entry in the CleanupHandlerMap. We must call count() first to
- // avoid creating a null entry for blocks we haven't scanned.
- if (CleanupHandlerMap.count(BB)) {
- if (auto *Action = CleanupHandlerMap[BB]) {
- Actions.insertCleanupHandler(Action);
- DEBUG(dbgs() << " Found cleanup code in block "
- << Action->getStartBlock()->getName() << "\n");
- // FIXME: This cleanup might chain into another, and we need to discover
- // that.
- return;
- } else {
- // Here we handle the case where the cleanup handler map contains a
- // value for this block but the value is a nullptr. This means that
- // we have previously analyzed the block and determined that it did
- // not contain any cleanup code. Based on the earlier analysis, we
- // know the block must end in either an unconditional branch, a
- // resume or a conditional branch that is predicated on a comparison
- // with a selector. Either the resume or the selector dispatch
- // would terminate the search for cleanup code, so the unconditional
- // branch is the only case for which we might need to continue
- // searching.
- BasicBlock *SuccBB = followSingleUnconditionalBranches(BB);
- if (SuccBB == BB || SuccBB == EndBB)
- return;
- BB = SuccBB;
+ auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first));
+ if (!OldI)
continue;
+ auto *NewI = cast<Instruction>(VT.second);
+ // Scan all uses of this instruction to see if it is used outside of its
+ // funclet, and if so, record them in UsesToRename.
+ for (Use &U : OldI->uses()) {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ BasicBlock *UserBB = UserI->getParent();
+ ColorVector &ColorsForUserBB = BlockColors[UserBB];
+ assert(!ColorsForUserBB.empty());
+ if (ColorsForUserBB.size() > 1 ||
+ *ColorsForUserBB.begin() != FuncletPadBB)
+ UsesToRename.push_back(&U);
}
- }
- // Create an entry in the cleanup handler map for this block. Initially
- // we create an entry that says this isn't a cleanup block. If we find
- // cleanup code, the caller will replace this entry.
- CleanupHandlerMap[BB] = nullptr;
+ // If there are no uses outside the block, we're done with this
+ // instruction.
+ if (UsesToRename.empty())
+ continue;
- TerminatorInst *Terminator = BB->getTerminator();
+ // We found a use of OldI outside of the funclet. Rename all uses of OldI
+ // that are outside its funclet to be uses of the appropriate PHI node
+ // etc.
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(OldI->getType(), OldI->getName());
+ SSAUpdate.AddAvailableValue(OldI->getParent(), OldI);
+ SSAUpdate.AddAvailableValue(NewI->getParent(), NewI);
- // Landing pad blocks have extra instructions we need to accept.
- LandingPadMap *LPadMap = nullptr;
- if (BB->isLandingPad()) {
- LandingPadInst *LPad = BB->getLandingPadInst();
- LPadMap = &LPadMaps[LPad];
- if (!LPadMap->isInitialized())
- LPadMap->mapLandingPad(LPad);
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val());
}
+ }
+}
- // Look for the bare resume pattern:
- // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0
- // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1
- // resume { i8*, i32 } %lpad.val2
- if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) {
- InsertValueInst *Insert1 = nullptr;
- InsertValueInst *Insert2 = nullptr;
- Value *ResumeVal = Resume->getOperand(0);
- // If the resume value isn't a phi or landingpad value, it should be a
- // series of insertions. Identify them so we can avoid them when scanning
- // for cleanups.
- if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) {
- Insert2 = dyn_cast<InsertValueInst>(ResumeVal);
- if (!Insert2)
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand());
- if (!Insert1)
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- }
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II) {
- Instruction *Inst = II;
- if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
- continue;
- if (Inst == Insert1 || Inst == Insert2 || Inst == Resume)
+void WinEHPrepare::removeImplausibleInstructions(Function &F) {
+ // Remove implausible terminators and replace them with UnreachableInst.
+ for (auto &Funclet : FuncletBlocks) {
+ BasicBlock *FuncletPadBB = Funclet.first;
+ std::vector<BasicBlock *> &BlocksInFunclet = Funclet.second;
+ Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI();
+ auto *FuncletPad = dyn_cast<FuncletPadInst>(FirstNonPHI);
+ auto *CatchPad = dyn_cast_or_null<CatchPadInst>(FuncletPad);
+ auto *CleanupPad = dyn_cast_or_null<CleanupPadInst>(FuncletPad);
+
+ for (BasicBlock *BB : BlocksInFunclet) {
+ for (Instruction &I : *BB) {
+ CallSite CS(&I);
+ if (!CS)
continue;
- if (!Inst->hasOneUse() ||
- (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) {
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- }
- }
- return;
- }
- BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
- if (Branch && Branch->isConditional()) {
- // Look for the selector dispatch.
- // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
- // %matches = icmp eq i32 %sel, %2
- // br i1 %matches, label %catch14, label %eh.resume
- CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
- if (!Compare || !Compare->isEquality())
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II) {
- Instruction *Inst = II;
- if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
- continue;
- if (Inst == Compare || Inst == Branch)
+ Value *FuncletBundleOperand = nullptr;
+ if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet))
+ FuncletBundleOperand = BU->Inputs.front();
+
+ if (FuncletBundleOperand == FuncletPad)
continue;
- if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+
+ // Skip call sites which are nounwind intrinsics.
+ auto *CalledFn =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
continue;
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- }
- // The selector dispatch block should always terminate our search.
- assert(BB == EndBB);
- return;
- }
- if (isAsynchronousEHPersonality(Personality)) {
- // If this is a landingpad block, split the block at the first non-landing
- // pad instruction.
- Instruction *MaybeCall = BB->getFirstNonPHIOrDbg();
- if (LPadMap) {
- while (MaybeCall != BB->getTerminator() &&
- LPadMap->isLandingPadSpecificInst(MaybeCall))
- MaybeCall = MaybeCall->getNextNode();
+ // This call site was not part of this funclet, remove it.
+ if (CS.isInvoke()) {
+ // Remove the unwind edge if it was an invoke.
+ removeUnwindEdge(BB);
+ // Get a pointer to the new call.
+ BasicBlock::iterator CallI =
+ std::prev(BB->getTerminator()->getIterator());
+ auto *CI = cast<CallInst>(&*CallI);
+ changeToUnreachable(CI, /*UseLLVMTrap=*/false);
+ } else {
+ changeToUnreachable(&I, /*UseLLVMTrap=*/false);
+ }
+
+ // There are no more instructions in the block (except for unreachable),
+ // we are done.
+ break;
}
- // Look for outlined finally calls on x64, since those happen to match the
- // prototype provided by the runtime.
- if (TheTriple.getArch() == Triple::x86_64) {
- if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) {
- Function *Fin = FinallyCall.getCalledFunction();
- assert(Fin && "outlined finally call should be direct");
- auto *Action = new CleanupHandler(BB);
- Action->setHandlerBlockOrFunc(Fin);
- Actions.insertCleanupHandler(Action);
- CleanupHandlerMap[BB] = Action;
- DEBUG(dbgs() << " Found frontend-outlined finally call to "
- << Fin->getName() << " in block "
- << Action->getStartBlock()->getName() << "\n");
-
- // Split the block if there were more interesting instructions and
- // look for finally calls in the normal successor block.
- BasicBlock *SuccBB = BB;
- if (FinallyCall.getInstruction() != BB->getTerminator() &&
- FinallyCall.getInstruction()->getNextNode() !=
- BB->getTerminator()) {
- SuccBB =
- SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT);
- } else {
- if (FinallyCall.isInvoke()) {
- SuccBB = cast<InvokeInst>(FinallyCall.getInstruction())
- ->getNormalDest();
- } else {
- SuccBB = BB->getUniqueSuccessor();
- assert(SuccBB &&
- "splitOutlinedFinallyCalls didn't insert a branch");
- }
- }
- BB = SuccBB;
- if (BB == EndBB)
- return;
- continue;
+ TerminatorInst *TI = BB->getTerminator();
+ // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst.
+ bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad;
+ // The token consumed by a CatchReturnInst must match the funclet token.
+ bool IsUnreachableCatchret = false;
+ if (auto *CRI = dyn_cast<CatchReturnInst>(TI))
+ IsUnreachableCatchret = CRI->getCatchPad() != CatchPad;
+ // The token consumed by a CleanupReturnInst must match the funclet token.
+ bool IsUnreachableCleanupret = false;
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(TI))
+ IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad;
+ if (IsUnreachableRet || IsUnreachableCatchret ||
+ IsUnreachableCleanupret) {
+ changeToUnreachable(TI, /*UseLLVMTrap=*/false);
+ } else if (isa<InvokeInst>(TI)) {
+ if (Personality == EHPersonality::MSVC_CXX && CleanupPad) {
+ // Invokes within a cleanuppad for the MSVC++ personality never
+ // transfer control to their unwind edge: the personality will
+ // terminate the program.
+ removeUnwindEdge(BB);
}
}
}
+ }
+}
- // Anything else is either a catch block or interesting cleanup code.
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II) {
- Instruction *Inst = II;
- if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
- continue;
- // Unconditional branches fall through to this loop.
- if (Inst == Branch)
- continue;
- // If this is a catch block, there is no cleanup code to be found.
- if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
- return;
- // If this a nested landing pad, it may contain an endcatch call.
- if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
- return;
- // Anything else makes this interesting cleanup code.
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- }
-
- // Only unconditional branches in empty blocks should get this far.
- assert(Branch && Branch->isUnconditional());
- if (BB == EndBB)
- return;
- BB = Branch->getSuccessor(0);
+void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
+ // Clean-up some of the mess we made by removing useles PHI nodes, trivial
+ // branches, etc.
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+ BasicBlock *BB = &*FI++;
+ SimplifyInstructionsInBlock(BB);
+ ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true);
+ MergeBlockIntoPredecessor(BB);
}
+
+ // We might have some unreachable blocks after cleaning up some impossible
+ // control flow.
+ removeUnreachableBlocks(F);
}
-// This is a public function, declared in WinEHFuncInfo.h and is also
-// referenced by WinEHNumbering in FunctionLoweringInfo.cpp.
-void llvm::parseEHActions(
- const IntrinsicInst *II,
- SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions) {
- assert(II->getIntrinsicID() == Intrinsic::eh_actions &&
- "attempted to parse non eh.actions intrinsic");
- for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) {
- uint64_t ActionKind =
- cast<ConstantInt>(II->getArgOperand(I))->getZExtValue();
- if (ActionKind == /*catch=*/1) {
- auto *Selector = cast<Constant>(II->getArgOperand(I + 1));
- ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2));
- int64_t EHObjIndexVal = EHObjIndex->getSExtValue();
- Constant *Handler = cast<Constant>(II->getArgOperand(I + 3));
- I += 4;
- auto CH = make_unique<CatchHandler>(/*BB=*/nullptr, Selector,
- /*NextBB=*/nullptr);
- CH->setHandlerBlockOrFunc(Handler);
- CH->setExceptionVarIndex(EHObjIndexVal);
- Actions.push_back(std::move(CH));
- } else if (ActionKind == 0) {
- Constant *Handler = cast<Constant>(II->getArgOperand(I + 1));
- I += 2;
- auto CH = make_unique<CleanupHandler>(/*BB=*/nullptr);
- CH->setHandlerBlockOrFunc(Handler);
- Actions.push_back(std::move(CH));
- } else {
- llvm_unreachable("Expected either a catch or cleanup handler!");
+void WinEHPrepare::verifyPreparedFunclets(Function &F) {
+ // Recolor the CFG to verify that all is well.
+ for (BasicBlock &BB : F) {
+ size_t NumColors = BlockColors[&BB].size();
+ assert(NumColors == 1 && "Expected monochromatic BB!");
+ if (NumColors == 0)
+ report_fatal_error("Uncolored BB!");
+ if (NumColors > 1)
+ report_fatal_error("Multicolor BB!");
+ if (!DisableDemotion) {
+ bool EHPadHasPHI = BB.isEHPad() && isa<PHINode>(BB.begin());
+ assert(!EHPadHasPHI && "EH Pad still has a PHI!");
+ if (EHPadHasPHI)
+ report_fatal_error("EH Pad still has a PHI!");
}
}
- std::reverse(Actions.begin(), Actions.end());
}
-namespace {
-struct WinEHNumbering {
- WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo),
- CurrentBaseState(-1), NextState(0) {}
+bool WinEHPrepare::prepareExplicitEH(Function &F) {
+ // Remove unreachable blocks. It is not valuable to assign them a color and
+ // their existence can trick us into thinking values are alive when they are
+ // not.
+ removeUnreachableBlocks(F);
- WinEHFuncInfo &FuncInfo;
- int CurrentBaseState;
- int NextState;
+ // Determine which blocks are reachable from which funclet entries.
+ colorFunclets(F);
- SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack;
- SmallPtrSet<const Function *, 4> VisitedHandlers;
+ cloneCommonBlocks(F);
- int currentEHNumber() const {
- return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState();
- }
+ if (!DisableDemotion)
+ demotePHIsOnFunclets(F);
- void createUnwindMapEntry(int ToState, ActionHandler *AH);
- void createTryBlockMapEntry(int TryLow, int TryHigh,
- ArrayRef<CatchHandler *> Handlers);
- void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
- ImmutableCallSite CS);
- void popUnmatchedActions(int FirstMismatch);
- void calculateStateNumbers(const Function &F);
- void findActionRootLPads(const Function &F);
-};
-}
+ if (!DisableCleanups) {
+ removeImplausibleInstructions(F);
-void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
- WinEHUnwindMapEntry UME;
- UME.ToState = ToState;
- if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH))
- UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc());
- else
- UME.Cleanup = nullptr;
- FuncInfo.UnwindMap.push_back(UME);
-}
-
-void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh,
- ArrayRef<CatchHandler *> Handlers) {
- // See if we already have an entry for this set of handlers.
- // This is using iterators rather than a range-based for loop because
- // if we find the entry we're looking for we'll need the iterator to erase it.
- int NumHandlers = Handlers.size();
- auto I = FuncInfo.TryBlockMap.begin();
- auto E = FuncInfo.TryBlockMap.end();
- for ( ; I != E; ++I) {
- auto &Entry = *I;
- if (Entry.HandlerArray.size() != (size_t)NumHandlers)
- continue;
- int N;
- for (N = 0; N < NumHandlers; ++N) {
- if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc())
- break; // breaks out of inner loop
- }
- // If all the handlers match, this is what we were looking for.
- if (N == NumHandlers) {
- break;
- }
- }
-
- // If we found an existing entry for this set of handlers, extend the range
- // but move the entry to the end of the map vector. The order of entries
- // in the map is critical to the way that the runtime finds handlers.
- // FIXME: Depending on what has happened with block ordering, this may
- // incorrectly combine entries that should remain separate.
- if (I != E) {
- // Copy the existing entry.
- WinEHTryBlockMapEntry Entry = *I;
- Entry.TryLow = std::min(TryLow, Entry.TryLow);
- Entry.TryHigh = std::max(TryHigh, Entry.TryHigh);
- assert(Entry.TryLow <= Entry.TryHigh);
- // Erase the old entry and add this one to the back.
- FuncInfo.TryBlockMap.erase(I);
- FuncInfo.TryBlockMap.push_back(Entry);
- return;
+ cleanupPreparedFunclets(F);
}
- // If we didn't find an entry, create a new one.
- WinEHTryBlockMapEntry TBME;
- TBME.TryLow = TryLow;
- TBME.TryHigh = TryHigh;
- assert(TBME.TryLow <= TBME.TryHigh);
- for (CatchHandler *CH : Handlers) {
- WinEHHandlerType HT;
- if (CH->getSelector()->isNullValue()) {
- HT.Adjectives = 0x40;
- HT.TypeDescriptor = nullptr;
- } else {
- auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts());
- // Selectors are always pointers to GlobalVariables with 'struct' type.
- // The struct has two fields, adjectives and a type descriptor.
- auto *CS = cast<ConstantStruct>(GV->getInitializer());
- HT.Adjectives =
- cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue();
- HT.TypeDescriptor =
- cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts());
- }
- HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc());
- HT.CatchObjRecoverIdx = CH->getExceptionVarIndex();
- TBME.HandlerArray.push_back(HT);
- }
- FuncInfo.TryBlockMap.push_back(TBME);
-}
+ verifyPreparedFunclets(F);
-static void print_name(const Value *V) {
-#ifndef NDEBUG
- if (!V) {
- DEBUG(dbgs() << "null");
- return;
- }
+ BlockColors.clear();
+ FuncletBlocks.clear();
- if (const auto *F = dyn_cast<Function>(V))
- DEBUG(dbgs() << F->getName());
- else
- DEBUG(V->dump());
-#endif
+ return true;
}
-void WinEHNumbering::processCallSite(
- MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
- ImmutableCallSite CS) {
- DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber()
- << ") for: ");
- print_name(CS ? CS.getCalledValue() : nullptr);
- DEBUG(dbgs() << '\n');
-
- DEBUG(dbgs() << "HandlerStack: \n");
- for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
- DEBUG(dbgs() << " ");
- print_name(HandlerStack[I]->getHandlerBlockOrFunc());
- DEBUG(dbgs() << '\n');
- }
- DEBUG(dbgs() << "Actions: \n");
- for (int I = 0, E = Actions.size(); I < E; ++I) {
- DEBUG(dbgs() << " ");
- print_name(Actions[I]->getHandlerBlockOrFunc());
- DEBUG(dbgs() << '\n');
- }
- int FirstMismatch = 0;
- for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
- ++FirstMismatch) {
- if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
- Actions[FirstMismatch]->getHandlerBlockOrFunc())
- break;
- }
-
- // Remove unmatched actions from the stack and process their EH states.
- popUnmatchedActions(FirstMismatch);
-
- DEBUG(dbgs() << "Pushing actions for CallSite: ");
- print_name(CS ? CS.getCalledValue() : nullptr);
- DEBUG(dbgs() << '\n');
-
- bool LastActionWasCatch = false;
- const LandingPadInst *LastRootLPad = nullptr;
- for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
- // We can reuse eh states when pushing two catches for the same invoke.
- bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get());
- auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc());
- // Various conditions can lead to a handler being popped from the
- // stack and re-pushed later. That shouldn't create a new state.
- // FIXME: Can code optimization lead to re-used handlers?
- if (FuncInfo.HandlerEnclosedState.count(Handler)) {
- // If we already assigned the state enclosed by this handler re-use it.
- Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]);
+// TODO: Share loads when one use dominates another, or when a catchpad exit
+// dominates uses (needs dominators).
+AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
+ BasicBlock *PHIBlock = PN->getParent();
+ AllocaInst *SpillSlot = nullptr;
+ Instruction *EHPad = PHIBlock->getFirstNonPHI();
+
+ if (!isa<TerminatorInst>(EHPad)) {
+ // If the EHPad isn't a terminator, then we can insert a load in this block
+ // that will dominate all uses.
+ SpillSlot = new AllocaInst(PN->getType(), nullptr,
+ Twine(PN->getName(), ".wineh.spillslot"),
+ &F.getEntryBlock().front());
+ Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
+ &*PHIBlock->getFirstInsertionPt());
+ PN->replaceAllUsesWith(V);
+ return SpillSlot;
+ }
+
+ // Otherwise, we have a PHI on a terminator EHPad, and we give up and insert
+ // loads of the slot before every use.
+ DenseMap<BasicBlock *, Value *> Loads;
+ for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ auto *UsingInst = cast<Instruction>(U.getUser());
+ if (isa<PHINode>(UsingInst) && UsingInst->getParent()->isEHPad()) {
+ // Use is on an EH pad phi. Leave it alone; we'll insert loads and
+ // stores for it separately.
continue;
}
- const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler];
- if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) {
- DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n");
- Actions[I]->setEHState(currentEHNumber());
- } else {
- DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", ");
- print_name(Actions[I]->getHandlerBlockOrFunc());
- DEBUG(dbgs() << ") with EH state " << NextState << "\n");
- createUnwindMapEntry(currentEHNumber(), Actions[I].get());
- DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n");
- Actions[I]->setEHState(NextState);
- NextState++;
- }
- HandlerStack.push_back(std::move(Actions[I]));
- LastActionWasCatch = CurrActionIsCatch;
- LastRootLPad = RootLPad;
+ replaceUseWithLoad(PN, U, SpillSlot, Loads, F);
}
-
- // This is used to defer numbering states for a handler until after the
- // last time it appears in an invoke action list.
- if (CS.isInvoke()) {
- for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
- auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc());
- if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction()))
- continue;
- FuncInfo.LastInvokeVisited[Handler] = true;
- DEBUG(dbgs() << "Last invoke of ");
- print_name(Handler);
- DEBUG(dbgs() << " has been visited.\n");
- }
- }
-
- DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
- print_name(CS ? CS.getCalledValue() : nullptr);
- DEBUG(dbgs() << '\n');
+ return SpillSlot;
}
-void WinEHNumbering::popUnmatchedActions(int FirstMismatch) {
- // Don't recurse while we are looping over the handler stack. Instead, defer
- // the numbering of the catch handlers until we are done popping.
- SmallVector<CatchHandler *, 4> PoppedCatches;
- for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
- std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val();
- if (isa<CatchHandler>(Handler.get()))
- PoppedCatches.push_back(cast<CatchHandler>(Handler.release()));
- }
+// TODO: improve store placement. Inserting at def is probably good, but need
+// to be careful not to introduce interfering stores (needs liveness analysis).
+// TODO: identify related phi nodes that can share spill slots, and share them
+// (also needs liveness).
+void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI,
+ AllocaInst *SpillSlot) {
+ // Use a worklist of (Block, Value) pairs -- the given Value needs to be
+ // stored to the spill slot by the end of the given Block.
+ SmallVector<std::pair<BasicBlock *, Value *>, 4> Worklist;
- int TryHigh = NextState - 1;
- int LastTryLowIdx = 0;
- for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
- CatchHandler *CH = PoppedCatches[I];
- DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n");
- if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
- int TryLow = CH->getEHState();
- auto Handlers =
- makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
- DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh);
- for (size_t J = 0; J < Handlers.size(); ++J) {
- DEBUG(dbgs() << ", ");
- print_name(Handlers[J]->getHandlerBlockOrFunc());
- }
- DEBUG(dbgs() << ")\n");
- createTryBlockMapEntry(TryLow, TryHigh, Handlers);
- LastTryLowIdx = I + 1;
- }
- }
+ Worklist.push_back({OriginalPHI->getParent(), OriginalPHI});
- for (CatchHandler *CH : PoppedCatches) {
- if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) {
- if (FuncInfo.LastInvokeVisited[F]) {
- DEBUG(dbgs() << "Assigning base state " << NextState << " to ");
- print_name(F);
- DEBUG(dbgs() << '\n');
- FuncInfo.HandlerBaseState[F] = NextState;
- DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber()
- << ", null)\n");
- createUnwindMapEntry(currentEHNumber(), nullptr);
- ++NextState;
- calculateStateNumbers(*F);
+ while (!Worklist.empty()) {
+ BasicBlock *EHBlock;
+ Value *InVal;
+ std::tie(EHBlock, InVal) = Worklist.pop_back_val();
+
+ PHINode *PN = dyn_cast<PHINode>(InVal);
+ if (PN && PN->getParent() == EHBlock) {
+ // The value is defined by another PHI we need to remove, with no room to
+ // insert a store after the PHI, so each predecessor needs to store its
+ // incoming value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
+ Value *PredVal = PN->getIncomingValue(i);
+
+ // Undef can safely be skipped.
+ if (isa<UndefValue>(PredVal))
+ continue;
+
+ insertPHIStore(PN->getIncomingBlock(i), PredVal, SpillSlot, Worklist);
}
- else {
- DEBUG(dbgs() << "Deferring handling of ");
- print_name(F);
- DEBUG(dbgs() << " until last invoke visited.\n");
+ } else {
+ // We need to store InVal, which dominates EHBlock, but can't put a store
+ // in EHBlock, so need to put stores in each predecessor.
+ for (BasicBlock *PredBlock : predecessors(EHBlock)) {
+ insertPHIStore(PredBlock, InVal, SpillSlot, Worklist);
}
}
- delete CH;
}
}
-void WinEHNumbering::calculateStateNumbers(const Function &F) {
- auto I = VisitedHandlers.insert(&F);
- if (!I.second)
- return; // We've already visited this handler, don't renumber it.
+void WinEHPrepare::insertPHIStore(
+ BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+ SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) {
- int OldBaseState = CurrentBaseState;
- if (FuncInfo.HandlerBaseState.count(&F)) {
- CurrentBaseState = FuncInfo.HandlerBaseState[&F];
- }
-
- size_t SavedHandlerStackSize = HandlerStack.size();
-
- DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n');
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- for (const BasicBlock &BB : F) {
- for (const Instruction &I : BB) {
- const auto *CI = dyn_cast<CallInst>(&I);
- if (!CI || CI->doesNotThrow())
- continue;
- processCallSite(None, CI);
- }
- const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
- if (!II)
- continue;
- const LandingPadInst *LPI = II->getLandingPadInst();
- auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
- if (!ActionsCall)
- continue;
- parseEHActions(ActionsCall, ActionList);
- if (ActionList.empty())
- continue;
- processCallSite(ActionList, II);
- ActionList.clear();
- FuncInfo.LandingPadStateMap[LPI] = currentEHNumber();
- DEBUG(dbgs() << "Assigning state " << currentEHNumber()
- << " to landing pad at " << LPI->getParent()->getName()
- << '\n');
+ if (PredBlock->isEHPad() &&
+ isa<TerminatorInst>(PredBlock->getFirstNonPHI())) {
+ // Pred is unsplittable, so we need to queue it on the worklist.
+ Worklist.push_back({PredBlock, PredVal});
+ return;
}
- // Pop any actions that were pushed on the stack for this function.
- popUnmatchedActions(SavedHandlerStackSize);
-
- DEBUG(dbgs() << "Assigning max state " << NextState - 1
- << " to " << F.getName() << '\n');
- FuncInfo.CatchHandlerMaxState[&F] = NextState - 1;
-
- CurrentBaseState = OldBaseState;
+ // Otherwise, insert the store at the end of the basic block.
+ new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator());
}
-// This function follows the same basic traversal as calculateStateNumbers
-// but it is necessary to identify the root landing pad associated
-// with each action before we start assigning state numbers.
-void WinEHNumbering::findActionRootLPads(const Function &F) {
- auto I = VisitedHandlers.insert(&F);
- if (!I.second)
- return; // We've already visited this handler, don't revisit it.
-
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- for (const BasicBlock &BB : F) {
- const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
- if (!II)
- continue;
- const LandingPadInst *LPI = II->getLandingPadInst();
- auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
- if (!ActionsCall)
- continue;
-
- assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
- parseEHActions(ActionsCall, ActionList);
- if (ActionList.empty())
- continue;
- for (int I = 0, E = ActionList.size(); I < E; ++I) {
- if (auto *Handler
- = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) {
- FuncInfo.LastInvoke[Handler] = II;
- // Don't replace the root landing pad if we previously saw this
- // handler in a different function.
- if (FuncInfo.RootLPad.count(Handler) &&
- FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F)
- continue;
- DEBUG(dbgs() << "Setting root lpad for ");
- print_name(Handler);
- DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n');
- FuncInfo.RootLPad[Handler] = LPI;
- }
+void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+ DenseMap<BasicBlock *, Value *> &Loads,
+ Function &F) {
+ // Lazilly create the spill slot.
+ if (!SpillSlot)
+ SpillSlot = new AllocaInst(V->getType(), nullptr,
+ Twine(V->getName(), ".wineh.spillslot"),
+ &F.getEntryBlock().front());
+
+ auto *UsingInst = cast<Instruction>(U.getUser());
+ if (auto *UsingPHI = dyn_cast<PHINode>(UsingInst)) {
+ // If this is a PHI node, we can't insert a load of the value before
+ // the use. Instead insert the load in the predecessor block
+ // corresponding to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this
+ // PHI node that we cannot have multiple loads. The problem is that
+ // the resulting PHI node will have multiple values (from each load)
+ // coming in from the same block, which is illegal SSA form.
+ // For this reason, we keep track of and reuse loads we insert.
+ BasicBlock *IncomingBlock = UsingPHI->getIncomingBlock(U);
+ if (auto *CatchRet =
+ dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
+ // Putting a load above a catchret and use on the phi would still leave
+ // a cross-funclet def/use. We need to split the edge, change the
+ // catchret to target the new block, and put the load there.
+ BasicBlock *PHIBlock = UsingInst->getParent();
+ BasicBlock *NewBlock = SplitEdge(IncomingBlock, PHIBlock);
+ // SplitEdge gives us:
+ // IncomingBlock:
+ // ...
+ // br label %NewBlock
+ // NewBlock:
+ // catchret label %PHIBlock
+ // But we need:
+ // IncomingBlock:
+ // ...
+ // catchret label %NewBlock
+ // NewBlock:
+ // br label %PHIBlock
+ // So move the terminators to each others' blocks and swap their
+ // successors.
+ BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator());
+ Goto->removeFromParent();
+ CatchRet->removeFromParent();
+ IncomingBlock->getInstList().push_back(CatchRet);
+ NewBlock->getInstList().push_back(Goto);
+ Goto->setSuccessor(0, PHIBlock);
+ CatchRet->setSuccessor(NewBlock);
+ // Update the color mapping for the newly split edge.
+ ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock];
+ BlockColors[NewBlock] = ColorsForPHIBlock;
+ for (BasicBlock *FuncletPad : ColorsForPHIBlock)
+ FuncletBlocks[FuncletPad].push_back(NewBlock);
+ // Treat the new block as incoming for load insertion.
+ IncomingBlock = NewBlock;
}
- // Walk the actions again and look for nested handlers. This has to
- // happen after all of the actions have been processed in the current
- // function.
- for (int I = 0, E = ActionList.size(); I < E; ++I)
- if (auto *Handler
- = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc()))
- findActionRootLPads(*Handler);
- ActionList.clear();
+ Value *&Load = Loads[IncomingBlock];
+ // Insert the load into the predecessor block
+ if (!Load)
+ Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
+ /*Volatile=*/false, IncomingBlock->getTerminator());
+
+ U.set(Load);
+ } else {
+ // Reload right before the old use.
+ auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
+ /*Volatile=*/false, UsingInst);
+ U.set(Load);
}
}
-void llvm::calculateWinCXXEHStateNumbers(const Function *ParentFn,
- WinEHFuncInfo &FuncInfo) {
- // Return if it's already been done.
- if (!FuncInfo.LandingPadStateMap.empty())
- return;
-
- WinEHNumbering Num(FuncInfo);
- Num.findActionRootLPads(*ParentFn);
- // The VisitedHandlers list is used by both findActionRootLPads and
- // calculateStateNumbers, but both functions need to visit all handlers.
- Num.VisitedHandlers.clear();
- Num.calculateStateNumbers(*ParentFn);
- // Pop everything on the handler stack.
- // It may be necessary to call this more than once because a handler can
- // be pushed on the stack as a result of clearing the stack.
- while (!Num.HandlerStack.empty())
- Num.processCallSite(None, ImmutableCallSite());
+void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II,
+ MCSymbol *InvokeBegin,
+ MCSymbol *InvokeEnd) {
+ assert(InvokeStateMap.count(II) &&
+ "should get invoke with precomputed state");
+ LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd);
}
+
+WinEHFuncInfo::WinEHFuncInfo() {}
OpenPOWER on IntegriCloud