summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp943
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h184
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp52
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.h85
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp349
-rw-r--r--contrib/llvm/lib/CodeGen/AntiDepBreaker.h71
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp136
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp2170
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp196
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp553
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp368
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h392
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp264
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h283
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp156
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp1711
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h383
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp2570
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h649
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp736
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h234
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp120
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp166
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp114
-rw-r--r--contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp466
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp1725
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h123
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp202
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp180
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp77
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp658
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h110
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp225
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp190
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp183
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp801
-rw-r--r--contrib/llvm/lib/CodeGen/EdgeBundles.cpp97
-rw-r--r--contrib/llvm/lib/CodeGen/ErlangGC.cpp81
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp725
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp74
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp225
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadata.cpp178
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp27
-rw-r--r--contrib/llvm/lib/CodeGen/GCStrategy.cpp430
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp1583
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp1295
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.cpp231
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.h228
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp565
-rw-r--r--contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp296
-rw-r--r--contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp152
-rw-r--r--contrib/llvm/lib/CodeGen/LexicalScopes.cpp335
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp995
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h70
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp951
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp1172
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp204
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp380
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.h242
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp387
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp154
-rw-r--r--contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp86
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp826
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp356
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp1181
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp1165
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp126
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp661
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp334
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominators.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp898
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp57
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp56
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp67
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp1867
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp330
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp1489
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp81
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp578
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePostDominators.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp360
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp364
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp2396
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp712
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp1290
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp1605
-rw-r--r--contrib/llvm/lib/CodeGen/OcamlGC.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp193
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp644
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.h25
-rw-r--r--contrib/llvm/lib/CodeGen/Passes.cpp746
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp577
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp776
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp170
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp892
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.h173
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp132
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.cpp145
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.h108
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp293
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp1117
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp1791
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp638
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp146
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp2193
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.h120
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp793
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp443
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp642
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp1322
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp100
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp248
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp10214
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp1507
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp483
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp982
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h146
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp3924
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp1451
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp3042
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp1145
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h750
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp525
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp770
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2795
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp655
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h114
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h56
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp799
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp3039
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp914
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h185
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp278
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp6382
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp6873
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h559
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp645
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp3019
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp299
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp2593
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGC.cpp452
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp1152
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp502
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp250
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp381
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.h156
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.cpp194
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.h47
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp1432
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h469
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp802
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp370
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp439
-rw-r--r--contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp825
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp970
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp739
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp1305
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp784
-rw-r--r--contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp52
-rw-r--r--contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp285
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp309
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp1678
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp215
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp359
172 files changed, 128993 insertions, 0 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
new file mode 100644
index 0000000..c50f8b5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -0,0 +1,943 @@
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AggressiveAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("agg-antidep-debugdiv",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+ MachineBasicBlock *BB) :
+ NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+ GroupNodeIndices(TargetRegs, 0),
+ KillIndices(TargetRegs, 0),
+ DefIndices(TargetRegs, 0)
+{
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0; i < NumTargetRegs; ++i) {
+ // Initialize all registers to be in their own group. Initially we
+ // assign the register to the same-indexed GroupNode.
+ GroupNodeIndices[i] = i;
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+}
+
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) {
+ unsigned Node = GroupNodeIndices[Reg];
+ while (GroupNodes[Node] != Node)
+ Node = GroupNodes[Node];
+
+ return Node;
+}
+
+void AggressiveAntiDepState::GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
+{
+ for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
+ if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
+ Regs.push_back(Reg);
+ }
+}
+
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
+{
+ assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
+ assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
+
+ // find group for each register
+ unsigned Group1 = GetGroup(Reg1);
+ unsigned Group2 = GetGroup(Reg2);
+
+ // if either group is 0, then that must become the parent
+ unsigned Parent = (Group1 == 0) ? Group1 : Group2;
+ unsigned Other = (Parent == Group1) ? Group2 : Group1;
+ GroupNodes.at(Other) = Parent;
+ return Parent;
+}
+
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
+{
+ // Create a new GroupNode for Reg. Reg's existing GroupNode must
+ // stay as is because there could be other GroupNodes referring to
+ // it.
+ unsigned idx = GroupNodes.size();
+ GroupNodes.push_back(idx);
+ GroupNodeIndices[Reg] = idx;
+ return idx;
+}
+
+bool AggressiveAntiDepState::IsLive(unsigned Reg)
+{
+ // KillIndex must be defined and DefIndex not defined for a register
+ // to be live.
+ return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
+}
+
+
+
+AggressiveAntiDepBreaker::
+AggressiveAntiDepBreaker(MachineFunction& MFi,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs) :
+ AntiDepBreaker(), MF(MFi),
+ MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
+ TRI(MF.getTarget().getRegisterInfo()),
+ RegClassInfo(RCI),
+ State(NULL) {
+ /* Collect a bitset of all registers that are only broken if they
+ are on the critical path. */
+ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+ BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+ if (CriticalPathSet.none())
+ CriticalPathSet = CPSet;
+ else
+ CriticalPathSet |= CPSet;
+ }
+
+ DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
+ DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
+ r = CriticalPathSet.find_next(r))
+ dbgs() << " " << TRI->getName(r));
+ DEBUG(dbgs() << '\n');
+}
+
+AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
+ delete State;
+}
+
+void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ assert(State == NULL);
+ State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
+
+ bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(BB);
+ for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+void AggressiveAntiDepBreaker::FinishBlock() {
+ delete State;
+ State = NULL;
+}
+
+void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+ PrescanInstruction(MI, Count, PassthruRegs);
+ ScanInstruction(MI, Count);
+
+ DEBUG(dbgs() << "Observe: ");
+ DEBUG(MI->dump());
+ DEBUG(dbgs() << "\tRegs:");
+
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ // If Reg is current live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled). If it is not live but was defined in the
+ // previous schedule region, then set its def index to the most
+ // conservative location (i.e. the beginning of the previous
+ // schedule region).
+ if (State->IsLive(Reg)) {
+ DEBUG(if (State->GetGroup(Reg) != 0)
+ dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg) << "->g0(region live-out)");
+ State->UnionGroups(Reg, 0);
+ } else if ((DefIndices[Reg] < InsertPosIndex)
+ && (DefIndices[Reg] >= Count)) {
+ DefIndices[Reg] = Count;
+ }
+ }
+ DEBUG(dbgs() << '\n');
+}
+
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
+ MachineOperand& MO)
+{
+ if (!MO.isReg() || !MO.isImplicit())
+ return false;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ return false;
+
+ MachineOperand *Op = NULL;
+ if (MO.isDef())
+ Op = MI->findRegisterUseOperand(Reg, true);
+ else
+ Op = MI->findRegisterDefOperand(Reg);
+
+ return((Op != NULL) && Op->isImplicit());
+}
+
+void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
+ std::set<unsigned>& PassthruRegs) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) ||
+ IsImplicitDefUse(MI, MO)) {
+ const unsigned Reg = MO.getReg();
+ PassthruRegs.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ PassthruRegs.insert(*SubRegs);
+ }
+ }
+}
+
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
+ SmallSet<unsigned, 4> RegSet;
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
+ unsigned Reg = P->getReg();
+ if (RegSet.count(Reg) == 0) {
+ Edges.push_back(&*P);
+ RegSet.insert(Reg);
+ }
+ }
+ }
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ if (SU != 0) {
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ const SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ }
+
+ return (Next) ? Next->getSUnit() : 0;
+}
+
+void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
+ const char *tag,
+ const char *header,
+ const char *footer) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ if (!State->IsLive(Reg)) {
+ KillIndices[Reg] = KillIdx;
+ DefIndices[Reg] = ~0u;
+ RegRefs.erase(Reg);
+ State->LeaveGroup(Reg);
+ DEBUG(if (header != NULL) {
+ dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
+ }
+ // Repeat for subregisters.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
+ if (!State->IsLive(SubregReg)) {
+ KillIndices[SubregReg] = KillIdx;
+ DefIndices[SubregReg] = ~0u;
+ RegRefs.erase(SubregReg);
+ State->LeaveGroup(SubregReg);
+ DEBUG(if (header != NULL) {
+ dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+ State->GetGroup(SubregReg) << tag);
+ }
+ }
+
+ DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer);
+}
+
+void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
+ unsigned Count,
+ std::set<unsigned>& PassthruRegs) {
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Handle dead defs by simulating a last-use of the register just
+ // after the def. A dead def can occur because the def is truly
+ // dead, or because only a subregister is live at the def. If we
+ // don't do this the dead def will be incorrectly merged into the
+ // previous def.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
+ }
+
+ DEBUG(dbgs() << "\tDef Groups:");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI)) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Any aliased that are live at this point are completely or
+ // partially defined here, so group those aliases with Reg.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (State->IsLive(AliasReg)) {
+ State->UnionGroups(Reg, AliasReg);
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
+ TRI->getName(AliasReg) << ")");
+ }
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = NULL;
+ if (i < MI->getDesc().getNumOperands())
+ RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Scan the register defs for this instruction and update
+ // live-ranges.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ // Ignore KILLs and passthru registers for liveness...
+ if (MI->isKill() || (PassthruRegs.count(Reg) != 0))
+ continue;
+
+ // Update def for Reg and aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ DefIndices[*AI] = Count;
+ }
+}
+
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ DEBUG(dbgs() << "\tUse Groups:");
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // If MI's uses have special allocation requirement, don't allow
+ // any use registers to be changed. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservatively here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
+ // Scan the register uses for this instruction and update
+ // live-ranges, groups and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg));
+
+ // It wasn't previously live but now it is, this is a kill. Forget
+ // the previous live-range information and start a new live-range
+ // for the register.
+ HandleLastUse(Reg, Count, "(last-use)");
+
+ if (Special) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = NULL;
+ if (i < MI->getDesc().getNumOperands())
+ RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Form a group of all defs and uses of a KILL instruction to ensure
+ // that all registers are renamed as a group.
+ if (MI->isKill()) {
+ DEBUG(dbgs() << "\tKill Group:");
+
+ unsigned FirstReg = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (FirstReg != 0) {
+ DEBUG(dbgs() << "=" << TRI->getName(Reg));
+ State->UnionGroups(FirstReg, Reg);
+ } else {
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ FirstReg = Reg;
+ }
+ }
+
+ DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
+ }
+}
+
+BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
+ BitVector BV(TRI->getNumRegs(), false);
+ bool first = true;
+
+ // Check all references that need rewriting for Reg. For each, use
+ // the corresponding register class to narrow the set of registers
+ // that are appropriate for renaming.
+ std::pair<std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator>
+ Range = State->GetRegRefs().equal_range(Reg);
+ for (std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
+ QE = Range.second; Q != QE; ++Q) {
+ const TargetRegisterClass *RC = Q->second.RC;
+ if (RC == NULL) continue;
+
+ BitVector RCBV = TRI->getAllocatableSet(MF, RC);
+ if (first) {
+ BV |= RCBV;
+ first = false;
+ } else {
+ BV &= RCBV;
+ }
+
+ DEBUG(dbgs() << " " << RC->getName());
+ }
+
+ return BV;
+}
+
+bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
+ unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Collect all referenced registers in the same group as
+ // AntiDepReg. These all need to be renamed together if we are to
+ // break the anti-dependence.
+ std::vector<unsigned> Regs;
+ State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
+ assert(Regs.size() > 0 && "Empty register group!");
+ if (Regs.size() == 0)
+ return false;
+
+ // Find the "superest" register in the group. At the same time,
+ // collect the BitVector of registers that can be used to rename
+ // each register.
+ DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+ << ":\n");
+ std::map<unsigned, BitVector> RenameRegisterMap;
+ unsigned SuperReg = 0;
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
+ SuperReg = Reg;
+
+ // If Reg has any references, then collect possible rename regs
+ if (RegRefs.count(Reg) > 0) {
+ DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
+
+ BitVector BV = GetRenameRegisters(Reg);
+ RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
+
+ DEBUG(dbgs() << " ::");
+ DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+ dbgs() << " " << TRI->getName(r));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+
+ // All group registers should be a subreg of SuperReg.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if (Reg == SuperReg) continue;
+ bool IsSub = TRI->isSubRegister(SuperReg, Reg);
+ assert(IsSub && "Expecting group subregister");
+ if (!IsSub)
+ return false;
+ }
+
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int renamecnt = 0;
+ if (renamecnt++ % DebugDiv != DebugMod)
+ return false;
+
+ dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
+ " for debug ***\n";
+ }
+#endif
+
+ // Check each possible rename register for SuperReg in round-robin
+ // order. If that register is available, and the corresponding
+ // registers are available for the other group subregisters, then we
+ // can use those registers to rename.
+
+ // FIXME: Using getMinimalPhysRegClass is very conservative. We should
+ // check every use of the register and find the largest register class
+ // that can be used in all of them.
+ const TargetRegisterClass *SuperRC =
+ TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
+
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC);
+ if (Order.empty()) {
+ DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tFind Registers:");
+
+ if (RenameOrder.count(SuperRC) == 0)
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
+
+ unsigned OrigR = RenameOrder[SuperRC];
+ unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+ unsigned R = OrigR;
+ do {
+ if (R == 0) R = Order.size();
+ --R;
+ const unsigned NewSuperReg = Order[R];
+ // Don't consider non-allocatable registers
+ if (!MRI.isAllocatable(NewSuperReg)) continue;
+ // Don't replace a register with itself.
+ if (NewSuperReg == SuperReg) continue;
+
+ DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
+ RenameMap.clear();
+
+ // For each referenced group register (which must be a SuperReg or
+ // a subregister of SuperReg), find the corresponding subregister
+ // of NewSuperReg and make sure it is free to be renamed.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ unsigned NewReg = 0;
+ if (Reg == SuperReg) {
+ NewReg = NewSuperReg;
+ } else {
+ unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+ if (NewSubRegIdx != 0)
+ NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+ }
+
+ DEBUG(dbgs() << " " << TRI->getName(NewReg));
+
+ // Check if Reg can be renamed to NewReg.
+ BitVector BV = RenameRegisterMap[Reg];
+ if (!BV.test(NewReg)) {
+ DEBUG(dbgs() << "(no rename)");
+ goto next_super_reg;
+ }
+
+ // If NewReg is dead and NewReg's most recent def is not before
+ // Regs's kill, it's safe to replace Reg with NewReg. We
+ // must also check all aliases of NewReg, because we can't define a
+ // register when any sub or super is already live.
+ if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+ DEBUG(dbgs() << "(live)");
+ goto next_super_reg;
+ } else {
+ bool found = false;
+ for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (State->IsLive(AliasReg) ||
+ (KillIndices[Reg] > DefIndices[AliasReg])) {
+ DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ goto next_super_reg;
+ }
+
+ // Record that 'Reg' can be renamed to 'NewReg'.
+ RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
+ }
+
+ // If we fall-out here, then every register in the group can be
+ // renamed, as recorded in RenameMap.
+ RenameOrder.erase(SuperRC);
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+ DEBUG(dbgs() << "]\n");
+ return true;
+
+ next_super_reg:
+ DEBUG(dbgs() << ']');
+ } while (R != EndR);
+
+ DEBUG(dbgs() << '\n');
+
+ // No registers are free and available!
+ return false;
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies within the
+/// ScheduleDAG and break them by renaming registers.
+///
+unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
+ const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // For each regclass the next register to use for renaming.
+ RenameOrderType RenameOrder;
+
+ // ...need a map from MI to SUnit.
+ std::map<MachineInstr *, const SUnit *> MISUnitMap;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
+ SU));
+ }
+
+ // Track progress along the critical path through the SUnit graph as
+ // we walk the instructions. This is needed for regclasses that only
+ // break critical-path anti-dependencies.
+ const SUnit *CriticalPathSU = 0;
+ MachineInstr *CriticalPathMI = 0;
+ if (CriticalPathSet.any()) {
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ if (!CriticalPathSU ||
+ ((SU->getDepth() + SU->Latency) >
+ (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+ CriticalPathSU = SU;
+ }
+ }
+
+ CriticalPathMI = CriticalPathSU->getInstr();
+ }
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (!State->IsLive(Reg))
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+#endif
+
+ // Attempt to break anti-dependence edges. Walk the instructions
+ // from the bottom up, tracking information about liveness as we go
+ // to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+
+ if (MI->isDebugValue())
+ continue;
+
+ DEBUG(dbgs() << "Anti: ");
+ DEBUG(MI->dump());
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+
+ // Process the defs in MI...
+ PrescanInstruction(MI, Count, PassthruRegs);
+
+ // The dependence edges that represent anti- and output-
+ // dependencies that are candidates for breaking.
+ std::vector<const SDep *> Edges;
+ const SUnit *PathSU = MISUnitMap[MI];
+ AntiDepEdges(PathSU, Edges);
+
+ // If MI is not on the critical path, then we don't rename
+ // registers in the CriticalPathSet.
+ BitVector *ExcludeRegs = NULL;
+ if (MI == CriticalPathMI) {
+ CriticalPathSU = CriticalPathStep(CriticalPathSU);
+ CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+ } else {
+ ExcludeRegs = &CriticalPathSet;
+ }
+
+ // Ignore KILL instructions (they form a group in ScanInstruction
+ // but don't cause any anti-dependence breaking themselves)
+ if (!MI->isKill()) {
+ // Attempt to break each anti-dependency...
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
+ const SDep *Edge = Edges[i];
+ SUnit *NextSU = Edge->getSUnit();
+
+ if ((Edge->getKind() != SDep::Anti) &&
+ (Edge->getKind() != SDep::Output)) continue;
+
+ unsigned AntiDepReg = Edge->getReg();
+ DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+
+ if (!MRI.isAllocatable(AntiDepReg)) {
+ // Don't break anti-dependencies on non-allocatable registers.
+ DEBUG(dbgs() << " (non-allocatable)\n");
+ continue;
+ } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+ // Don't break anti-dependencies for critical path registers
+ // if not on the critical path
+ DEBUG(dbgs() << " (not critical-path)\n");
+ continue;
+ } else if (PassthruRegs.count(AntiDepReg) != 0) {
+ // If the anti-dep register liveness "passes-thru", then
+ // don't try to change it. It will be changed along with
+ // the use if required to break an earlier antidep.
+ DEBUG(dbgs() << " (passthru)\n");
+ continue;
+ } else {
+ // No anti-dep breaking for implicit deps
+ MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
+ assert(AntiDepOp != NULL &&
+ "Can't find index for defined register operand");
+ if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
+ DEBUG(dbgs() << " (implicit)\n");
+ continue;
+ }
+
+ // If the SUnit has other dependencies on the SUnit that
+ // it anti-depends on, don't bother breaking the
+ // anti-dependency since those edges would prevent such
+ // units from being scheduled past each other
+ // regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
+ (P->getKind() != SDep::Output)) {
+ DEBUG(dbgs() << " (real dependency)\n");
+ AntiDepReg = 0;
+ break;
+ } else if ((P->getSUnit() != NextSU) &&
+ (P->getKind() == SDep::Data) &&
+ (P->getReg() == AntiDepReg)) {
+ DEBUG(dbgs() << " (other dependency)\n");
+ AntiDepReg = 0;
+ break;
+ }
+ }
+
+ if (AntiDepReg == 0) continue;
+ }
+
+ assert(AntiDepReg != 0);
+ if (AntiDepReg == 0) continue;
+
+ // Determine AntiDepReg's register group.
+ const unsigned GroupIndex = State->GetGroup(AntiDepReg);
+ if (GroupIndex == 0) {
+ DEBUG(dbgs() << " (zero group)\n");
+ continue;
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Look for a suitable register to use to break the anti-dependence.
+ std::map<unsigned, unsigned> RenameMap;
+ if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
+ DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg) << ":");
+
+ // Handle each group register...
+ for (std::map<unsigned, unsigned>::iterator
+ S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
+ unsigned CurrReg = S->first;
+ unsigned NewReg = S->second;
+
+ DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
+ TRI->getName(NewReg) << "(" <<
+ RegRefs.count(CurrReg) << " refs)");
+
+ // Update the references to the old register CurrReg to
+ // refer to the new register NewReg.
+ std::pair<std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator>
+ Range = RegRefs.equal_range(CurrReg);
+ for (std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ Q->second.Operand->setReg(NewReg);
+ // If the SU for the instruction being updated has debug
+ // information related to the anti-dependency register, make
+ // sure to update that as well.
+ const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+ if (!SU) continue;
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q->second.Operand->getParent())
+ UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for CurrReg is now inconsistent. Set
+ // the state as if it were dead.
+ State->UnionGroups(NewReg, 0);
+ RegRefs.erase(NewReg);
+ DefIndices[NewReg] = DefIndices[CurrReg];
+ KillIndices[NewReg] = KillIndices[CurrReg];
+
+ State->UnionGroups(CurrReg, 0);
+ RegRefs.erase(CurrReg);
+ DefIndices[CurrReg] = KillIndices[CurrReg];
+ KillIndices[CurrReg] = ~0u;
+ assert(((KillIndices[CurrReg] == ~0u) !=
+ (DefIndices[CurrReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+ }
+
+ ++Broken;
+ DEBUG(dbgs() << '\n');
+ }
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
new file mode 100644
index 0000000..6683630
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -0,0 +1,184 @@
+//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
+
+namespace llvm {
+class RegisterClassInfo;
+
+ /// Class AggressiveAntiDepState
+ /// Contains all the state necessary for anti-dep breaking.
+ class AggressiveAntiDepState {
+ public:
+ /// RegisterReference - Information about a register reference
+ /// within a liverange
+ typedef struct {
+ /// Operand - The registers operand
+ MachineOperand *Operand;
+ /// RC - The register class
+ const TargetRegisterClass *RC;
+ } RegisterReference;
+
+ private:
+ /// NumTargetRegs - Number of non-virtual target registers
+ /// (i.e. TRI->getNumRegs()).
+ const unsigned NumTargetRegs;
+
+ /// GroupNodes - Implements a disjoint-union data structure to
+ /// form register groups. A node is represented by an index into
+ /// the vector. A node can "point to" itself to indicate that it
+ /// is the parent of a group, or point to another node to indicate
+ /// that it is a member of the same group as that node.
+ std::vector<unsigned> GroupNodes;
+
+ /// GroupNodeIndices - For each register, the index of the GroupNode
+ /// currently representing the group that the register belongs to.
+ /// Register 0 is always represented by the 0 group, a group
+ /// composed of registers that are not eligible for anti-aliasing.
+ std::vector<unsigned> GroupNodeIndices;
+
+ /// RegRefs - Map registers to all their references within a live range.
+ std::multimap<unsigned, RegisterReference> RegRefs;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ public:
+ AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
+
+ /// GetKillIndices - Return the kill indices.
+ std::vector<unsigned> &GetKillIndices() { return KillIndices; }
+
+ /// GetDefIndices - Return the define indices.
+ std::vector<unsigned> &GetDefIndices() { return DefIndices; }
+
+ /// GetRegRefs - Return the RegRefs map.
+ std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
+
+ // GetGroup - Get the group for a register. The returned value is
+ // the index of the GroupNode representing the group.
+ unsigned GetGroup(unsigned Reg);
+
+ // GetGroupRegs - Return a vector of the registers belonging to a
+ // group. If RegRefs is non-NULL then only included referenced registers.
+ void GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference> *RegRefs);
+
+ // UnionGroups - Union Reg1's and Reg2's groups to form a new
+ // group. Return the index of the GroupNode representing the
+ // group.
+ unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
+
+ // LeaveGroup - Remove a register from its current group and place
+ // it alone in its own group. Return the index of the GroupNode
+ // representing the registers new group.
+ unsigned LeaveGroup(unsigned Reg);
+
+ /// IsLive - Return true if Reg is live
+ bool IsLive(unsigned Reg);
+ };
+
+
+ /// Class AggressiveAntiDepBreaker
+ class AggressiveAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// CriticalPathSet - The set of registers that should only be
+ /// renamed if they are on the critical path.
+ BitVector CriticalPathSet;
+
+ /// State - The state used to identify and rename anti-dependence
+ /// registers.
+ AggressiveAntiDepState *State;
+
+ public:
+ AggressiveAntiDepBreaker(MachineFunction& MFi,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
+ ~AggressiveAntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+ /// path
+ /// of the ScheduleDAG and break them by renaming registers.
+ ///
+ unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues);
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ void FinishBlock();
+
+ private:
+ /// Keep track of a position in the allocation order for each regclass.
+ typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
+
+ /// IsImplicitDefUse - Return true if MO represents a register
+ /// that is both implicitly used and defined in MI
+ bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
+
+ /// GetPassthruRegs - If MI implicitly def/uses a register, then
+ /// return that register and all subregisters.
+ void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
+
+ void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+ const char *header =NULL, const char *footer =NULL);
+
+ void PrescanInstruction(MachineInstr *MI, unsigned Count,
+ std::set<unsigned>& PassthruRegs);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ BitVector GetRenameRegisters(unsigned Reg);
+ bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 0000000..3fa1f8f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,52 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo)
+ : Pos(0) {
+ const MachineFunction &MF = VRM.getMachineFunction();
+ const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
+ Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
+ TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM);
+ rewind();
+
+ DEBUG({
+ if (!Hints.empty()) {
+ dbgs() << "hints:";
+ for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+ dbgs() << ' ' << PrintReg(Hints[I], TRI);
+ dbgs() << '\n';
+ }
+ });
+#ifndef NDEBUG
+ for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+ assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() &&
+ "Target hint is outside allocation order.");
+#endif
+}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 0000000..aed461a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,85 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCRegisterInfo.h"
+
+namespace llvm {
+
+class RegisterClassInfo;
+class VirtRegMap;
+
+class AllocationOrder {
+ SmallVector<MCPhysReg, 16> Hints;
+ ArrayRef<MCPhysReg> Order;
+ int Pos;
+
+public:
+ /// Create a new AllocationOrder for VirtReg.
+ /// @param VirtReg Virtual register to allocate for.
+ /// @param VRM Virtual register map for function.
+ /// @param RegClassInfo Information about reserved and allocatable registers.
+ AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo);
+
+ /// Get the allocation order without reordered hints.
+ ArrayRef<MCPhysReg> getOrder() const { return Order; }
+
+ /// Return the next physical register in the allocation order, or 0.
+ /// It is safe to call next() again after it returned 0, it will keep
+ /// returning 0 until rewind() is called.
+ unsigned next() {
+ if (Pos < 0)
+ return Hints.end()[Pos++];
+ while (Pos < int(Order.size())) {
+ unsigned Reg = Order[Pos++];
+ if (!isHint(Reg))
+ return Reg;
+ }
+ return 0;
+ }
+
+ /// As next(), but allow duplicates to be returned, and stop before the
+ /// Limit'th register in the RegisterClassInfo allocation order.
+ ///
+ /// This can produce more than Limit registers if there are hints.
+ unsigned nextWithDups(unsigned Limit) {
+ if (Pos < 0)
+ return Hints.end()[Pos++];
+ if (Pos < int(Limit))
+ return Order[Pos++];
+ return 0;
+ }
+
+ /// Start over from the beginning.
+ void rewind() { Pos = -int(Hints.size()); }
+
+ /// Return true if the last register returned from next() was a preferred register.
+ bool isHint() const { return Pos <= 0; }
+
+ /// Return true if PhysReg is a preferred register.
+ bool isHint(unsigned PhysReg) const {
+ return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end();
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
new file mode 100644
index 0000000..dd7282c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,349 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(Type *Ty,
+ const unsigned *Indices,
+ const unsigned *IndicesEnd,
+ unsigned CurIndex) {
+ // Base case: We're done.
+ if (Indices && Indices == IndicesEnd)
+ return CurIndex;
+
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ if (Indices && *Indices == unsigned(EI - EB))
+ return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // Given an array type, recursively traverse the elements.
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+ if (Indices && *Indices == i)
+ return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // We haven't found the type we're looking for, so keep searching.
+ return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy);
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI)
+ ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+ StartingOffset + SL->getElementOffset(EI - EB));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty->isVoidTy())
+ return;
+ // Base case: we can get an EVT for this LLVM IR type.
+ ValueVTs.push_back(TLI.getValueType(Ty));
+ if (Offsets)
+ Offsets->push_back(StartingOffset);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+ V = V->stripPointerCasts();
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+
+ if (GV && GV->getName() == "llvm.eh.catch.all.value") {
+ assert(GV->hasInitializer() &&
+ "The EH catch-all value must have an initializer");
+ Value *Init = GV->getInitializer();
+ GV = dyn_cast<GlobalVariable>(Init);
+ if (!GV) V = cast<ConstantPointerNull>(Init);
+ }
+
+ assert((GV || isa<ConstantPointerNull>(V)) &&
+ "TypeInfo must be a global variable or NULL");
+ return GV;
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
+ const TargetLowering &TLI) {
+ for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+ InlineAsm::ConstraintInfo &CI = CInfos[i];
+ for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+ if (CType == TargetLowering::C_Memory)
+ return true;
+ }
+
+ // Indirect operand accesses access memory.
+ if (CI.isIndirect)
+ return true;
+ }
+
+ return false;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code. This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+ switch (Pred) {
+ case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+ case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
+ case FCmpInst::FCMP_OGT: return ISD::SETOGT;
+ case FCmpInst::FCMP_OGE: return ISD::SETOGE;
+ case FCmpInst::FCMP_OLT: return ISD::SETOLT;
+ case FCmpInst::FCMP_OLE: return ISD::SETOLE;
+ case FCmpInst::FCMP_ONE: return ISD::SETONE;
+ case FCmpInst::FCMP_ORD: return ISD::SETO;
+ case FCmpInst::FCMP_UNO: return ISD::SETUO;
+ case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
+ case FCmpInst::FCMP_UGT: return ISD::SETUGT;
+ case FCmpInst::FCMP_UGE: return ISD::SETUGE;
+ case FCmpInst::FCMP_ULT: return ISD::SETULT;
+ case FCmpInst::FCMP_ULE: return ISD::SETULE;
+ case FCmpInst::FCMP_UNE: return ISD::SETUNE;
+ case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
+ default: llvm_unreachable("Invalid FCmp predicate opcode!");
+ }
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+ case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+ case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+ case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+ case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+ case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+ default: return CC;
+ }
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ: return ISD::SETEQ;
+ case ICmpInst::ICMP_NE: return ISD::SETNE;
+ case ICmpInst::ICMP_SLE: return ISD::SETLE;
+ case ICmpInst::ICMP_ULE: return ISD::SETULE;
+ case ICmpInst::ICMP_SGE: return ISD::SETGE;
+ case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+ case ICmpInst::ICMP_SLT: return ISD::SETLT;
+ case ICmpInst::ICMP_ULT: return ISD::SETULT;
+ case ICmpInst::ICMP_SGT: return ISD::SETGT;
+ case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+ default:
+ llvm_unreachable("Invalid ICmp predicate opcode!");
+ }
+}
+
+
+/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look
+/// through it (and any transitive noop operands to it) and return its input
+/// value. This is used to determine if a tail call can be formed.
+///
+static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
+ // If V is not an instruction, it can't be looked through.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V;
+
+ Value *Op = I->getOperand(0);
+
+ // Look through truly no-op truncates.
+ if (isa<TruncInst>(I) &&
+ TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType()))
+ return getNoopInput(I->getOperand(0), TLI);
+
+ // Look through truly no-op bitcasts.
+ if (isa<BitCastInst>(I)) {
+ // No type change at all.
+ if (Op->getType() == I->getType())
+ return getNoopInput(Op, TLI);
+
+ // Pointer to pointer cast.
+ if (Op->getType()->isPointerTy() && I->getType()->isPointerTy())
+ return getNoopInput(Op, TLI);
+
+ if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) &&
+ TLI.isTypeLegal(EVT::getEVT(Op->getType())) &&
+ TLI.isTypeLegal(EVT::getEVT(I->getType())))
+ return getNoopInput(Op, TLI);
+ }
+
+ // Look through inttoptr.
+ if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) {
+ // Make sure this isn't a truncating or extending cast. We could support
+ // this eventually, but don't bother for now.
+ if (TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(Op->getType())->getBitWidth())
+ return getNoopInput(Op, TLI);
+ }
+
+ // Look through ptrtoint.
+ if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) {
+ // Make sure this isn't a truncating or extending cast. We could support
+ // this eventually, but don't bother for now.
+ if (TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(I->getType())->getBitWidth())
+ return getNoopInput(Op, TLI);
+ }
+
+
+ // Otherwise it's not something we can look through.
+ return V;
+}
+
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
+ const Instruction *I = CS.getInstruction();
+ const BasicBlock *ExitBB = I->getParent();
+ const TerminatorInst *Term = ExitBB->getTerminator();
+ const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+
+ // The block must end in a return statement or unreachable.
+ //
+ // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+ // an unreachable, for now. The way tailcall optimization is currently
+ // implemented means it will add an epilogue followed by a jump. That is
+ // not profitable. Also, if the callee is a special function (e.g.
+ // longjmp on x86), it can end up causing miscompilation that has not
+ // been fully understood.
+ if (!Ret &&
+ (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+ !isa<UnreachableInst>(Term)))
+ return false;
+
+ // If I will have a chain, make sure no other instruction that will have a
+ // chain interposes between I and the return.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(I))
+ for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+ --BBI) {
+ if (&*BBI == I)
+ break;
+ // Debug info intrinsics do not get in the way of tail call optimization.
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(BBI))
+ return false;
+ }
+
+ // If the block ends with a void return or unreachable, it doesn't matter
+ // what the call's return type is.
+ if (!Ret || Ret->getNumOperands() == 0) return true;
+
+ // If the return value is undef, it doesn't matter what the call's
+ // return type is.
+ if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ const Function *F = ExitBB->getParent();
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias))
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Otherwise, make sure the unmodified return value of I is the return value.
+ // We handle two cases: multiple return values + scalars.
+ Value *RetVal = Ret->getOperand(0);
+ if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
+ // Handle scalars first.
+ return getNoopInput(Ret->getOperand(0), TLI) == I;
+
+ // If this is an aggregate return, look through the insert/extract values and
+ // see if each is transparent.
+ for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
+ i != e; ++i) {
+ const Value *InScalar = FindInsertedValue(RetVal, i);
+ if (InScalar == 0) return false;
+ InScalar = getNoopInput(InScalar, TLI);
+
+ // If the scalar value being inserted is an extractvalue of the right index
+ // from the call, then everything is good.
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
+ if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
+ EVI->getIndices()[0] != i)
+ return false;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
new file mode 100644
index 0000000..df47f98
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
@@ -0,0 +1,71 @@
+//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <vector>
+
+namespace llvm {
+
+/// AntiDepBreaker - This class works into conjunction with the
+/// post-RA scheduler to rename registers to break register
+/// anti-dependencies.
+class AntiDepBreaker {
+public:
+ typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
+ DbgValueVector;
+
+ virtual ~AntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ virtual void StartBlock(MachineBasicBlock *BB) =0;
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies within a
+ /// basic-block region and break them by renaming registers. Return
+ /// the number of anti-dependencies broken.
+ ///
+ virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) = 0;
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ virtual void Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) =0;
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ virtual void FinishBlock() =0;
+
+ /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating
+ /// other machine instruction to use NewReg.
+ void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) {
+ assert (MI->isDebugValue() && "MI is not DBG_VALUE!");
+ if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg)
+ MI->getOperand(0).setReg(NewReg);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
new file mode 100644
index 0000000..188047d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -0,0 +1,136 @@
+//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
+ cl::desc("Generate ARM EHABI tables with unwinding descriptors"),
+ cl::init(false));
+
+
+ARMException::ARMException(AsmPrinter *A)
+ : DwarfException(A) {}
+
+ARMException::~ARMException() {}
+
+void ARMException::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void ARMException::BeginFunction(const MachineFunction *MF) {
+ Asm->OutStreamer.EmitFnStart();
+ if (Asm->MF->getFunction()->needsUnwindTableEntry())
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void ARMException::EndFunction() {
+ if (!Asm->MF->getFunction()->needsUnwindTableEntry())
+ Asm->OutStreamer.EmitCantUnwind();
+ else {
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ if (EnableARMEHABIDescriptors) {
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (!MMI->getLandingPads().empty()) {
+ // Emit references to personality.
+ if (const Function * Personality =
+ MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+ MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+ Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+ Asm->OutStreamer.EmitPersonality(PerSym);
+ }
+
+ // Emit .handlerdata directive.
+ Asm->OutStreamer.EmitHandlerData();
+
+ // Emit actual exception table
+ EmitExceptionTable();
+ }
+ }
+ }
+
+ Asm->OutStreamer.EmitFnEnd();
+}
+
+void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ int Entry = 0;
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+ const GlobalVariable *GV = *I;
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+ Asm->EmitTTypeReference(GV, TTypeEncoding);
+ }
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (TypeID != 0)
+ Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+ }
+
+ Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]),
+ TTypeEncoding);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 0000000..d4a745d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,2170 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+char AsmPrinter::ID = 0;
+
+typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
+static gcp_map_type &getGCMap(void *&P) {
+ if (P == 0)
+ P = new gcp_map_type();
+ return *(gcp_map_type*)P;
+}
+
+
+/// getGVAlignmentLog2 - Return the alignment to use for the specified global
+/// value in log2 form. This rounds up to the preferred alignment if possible
+/// and legal.
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
+ unsigned InBits = 0) {
+ unsigned NumBits = 0;
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ NumBits = TD.getPreferredAlignmentLog(GVar);
+
+ // If InBits is specified, round it to it.
+ if (InBits > NumBits)
+ NumBits = InBits;
+
+ // If the GV has a specified alignment, take it into account.
+ if (GV->getAlignment() == 0)
+ return NumBits;
+
+ unsigned GVAlign = Log2_32(GV->getAlignment());
+
+ // If the GVAlign is larger than NumBits, or if we are required to obey
+ // NumBits because the GV has an assigned section, obey it.
+ if (GVAlign > NumBits || GV->hasSection())
+ NumBits = GVAlign;
+ return NumBits;
+}
+
+AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+ : MachineFunctionPass(ID),
+ TM(tm), MAI(tm.getMCAsmInfo()),
+ OutContext(Streamer.getContext()),
+ OutStreamer(Streamer),
+ LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
+ DD = 0; DE = 0; MMI = 0; LI = 0;
+ CurrentFnSym = CurrentFnSymForSize = 0;
+ GCMetadataPrinters = 0;
+ VerboseAsm = Streamer.isVerboseAsm();
+}
+
+AsmPrinter::~AsmPrinter() {
+ assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
+
+ if (GCMetadataPrinters != 0) {
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+
+ for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
+ delete I->second;
+ delete &GCMap;
+ GCMetadataPrinters = 0;
+ }
+
+ delete &OutStreamer;
+}
+
+/// getFunctionNumber - Return a unique ID for the current function.
+///
+unsigned AsmPrinter::getFunctionNumber() const {
+ return MF->getFunctionNumber();
+}
+
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+ return TM.getTargetLowering()->getObjFileLowering();
+}
+
+/// getDataLayout - Return information about data layout.
+const DataLayout &AsmPrinter::getDataLayout() const {
+ return *TM.getDataLayout();
+}
+
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+ return OutStreamer.getCurrentSection();
+}
+
+
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+ if (isVerbose())
+ AU.addRequired<MachineLoopInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+ OutStreamer.InitStreamer();
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MMI->AnalyzeModule(M);
+
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ Mang = new Mangler(OutContext, *TM.getDataLayout());
+
+ // Allow the target to emit any magic that it wants at the start of the file.
+ EmitStartOfAsmFile(M);
+
+ // Very minimal debug info. It is ignored if we emit actual debug info. If we
+ // don't, this at least helps the user find where a global came from.
+ if (MAI->hasSingleParameterDotFile()) {
+ // .file "foo.c"
+ OutStreamer.EmitFileDirective(M.getModuleIdentifier());
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ MP->beginAssembly(*this);
+
+ // Emit module-level inline asm if it exists.
+ if (!M.getModuleInlineAsm().empty()) {
+ OutStreamer.AddComment("Start of file scope inline assembly");
+ OutStreamer.AddBlankLine();
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n");
+ OutStreamer.AddComment("End of file scope inline assembly");
+ OutStreamer.AddBlankLine();
+ }
+
+ if (MAI->doesSupportDebugInformation())
+ DD = new DwarfDebug(this, &M);
+
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::None:
+ return false;
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ DE = new DwarfCFIException(this);
+ return false;
+ case ExceptionHandling::ARM:
+ DE = new ARMException(this);
+ return false;
+ case ExceptionHandling::Win64:
+ DE = new Win64Exception(this);
+ return false;
+ }
+
+ llvm_unreachable("Unknown exception type.");
+}
+
+void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
+ switch ((GlobalValue::LinkageTypes)Linkage) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ if (MAI->getWeakDefDirective() != 0) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+ if ((GlobalValue::LinkageTypes)Linkage !=
+ GlobalValue::LinkOnceODRAutoHideLinkage)
+ // .weak_definition _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ else
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+ } else if (MAI->getLinkOnceDirective() != 0) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ //NOTE: linkonce is handled by the section the symbol was assigned to.
+ } else {
+ // .weak _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+ }
+ break;
+ case GlobalValue::DLLExportLinkage:
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol.
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ break;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
+ break;
+ default:
+ llvm_unreachable("Unknown linkage type!");
+ }
+}
+
+
+/// EmitGlobalVariable - Emit the specified global variable to the .s file.
+void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GV))
+ return;
+
+ if (isVerbose()) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+ }
+
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+
+ if (!GV->hasInitializer()) // External globals require no extra code.
+ return;
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+
+ const DataLayout *TD = TM.getDataLayout();
+ uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+
+ // If the alignment is specified, we *must* obey it. Overaligning a global
+ // with a specified alignment is a prompt way to break globals emitted to
+ // sections and expected to be contiguous (e.g. ObjC metadata).
+ unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
+
+ // Handle common and BSS local symbols (.lcomm).
+ if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ unsigned Align = 1 << AlignLog;
+
+ // Handle common symbols.
+ if (GVKind.isCommon()) {
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ // Handle local BSS symbols.
+ if (MAI->hasMachoZeroFillDirective()) {
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+ // .zerofill __DATA, __bss, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align);
+ return;
+ }
+
+ // Use .lcomm only if it supports user-specified alignment.
+ // Otherwise, while it would still be correct to use .lcomm in some
+ // cases (e.g. when Align == 1), the external assembler might enfore
+ // some -unknown- default alignment behavior, which could cause
+ // spurious differences between external and integrated assembler.
+ // Prefer to simply fall back to .local / .comm in this case.
+ if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
+ // .lcomm _foo, 42
+ OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .local _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+
+ // Handle the zerofill directive on darwin, which is a special form of BSS
+ // emission.
+ if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+ if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined.
+
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ // .zerofill __DATA, __common, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+ return;
+ }
+
+ // Handle thread local data for mach-o which requires us to output an
+ // additional structure of data and mangle the original symbol so that we
+ // can reference it later.
+ //
+ // TODO: This should become an "emit thread local global" method on TLOF.
+ // All of this macho specific stuff should be sunk down into TLOFMachO and
+ // stuff like "TLSExtraDataSection" should no longer be part of the parent
+ // TLOF class. This will also make it more obvious that stuff like
+ // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+ // specific code.
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+ // Emit the .tbss symbol
+ MCSymbol *MangSym =
+ OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+
+ if (GVKind.isThreadBSS())
+ OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+ else if (GVKind.isThreadData()) {
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitAlignment(AlignLog, GV);
+ OutStreamer.EmitLabel(MangSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // Emit the variable struct for the runtime.
+ const MCSection *TLVSect
+ = getObjFileLowering().getTLSExtraDataSection();
+
+ OutStreamer.SwitchSection(TLVSect);
+ // Emit the linkage here.
+ EmitLinkage(GV->getLinkage(), GVSym);
+ OutStreamer.EmitLabel(GVSym);
+
+ // Three pointers in size:
+ // - __tlv_bootstrap - used to make sure support exists
+ // - spare pointer, used when mapped by the runtime
+ // - pointer to mangled symbol above with initializer
+ unsigned PtrSize = TD->getPointerSizeInBits()/8;
+ OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+ PtrSize);
+ OutStreamer.EmitIntValue(0, PtrSize);
+ OutStreamer.EmitSymbolValue(MangSym, PtrSize);
+
+ OutStreamer.AddBlankLine();
+ return;
+ }
+
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitLinkage(GV->getLinkage(), GVSym);
+ EmitAlignment(AlignLog, GV);
+
+ OutStreamer.EmitLabel(GVSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ // .size foo, 42
+ OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+
+ OutStreamer.AddBlankLine();
+}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::EmitFunctionHeader() {
+ // Print out constants referenced by the function
+ EmitConstantPool();
+
+ // Print the 'header' of function.
+ const Function *F = MF->getFunction();
+
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+ EmitVisibility(CurrentFnSym, F->getVisibility());
+
+ EmitLinkage(F->getLinkage(), CurrentFnSym);
+ EmitAlignment(MF->getAlignment(), F);
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+ if (isVerbose()) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), F,
+ /*PrintType=*/false, F->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+
+ // Emit the CurrentFnSym. This is a virtual function to allow targets to
+ // do their wild and crazy things as required.
+ EmitFunctionEntryLabel();
+
+ // If the function had address-taken blocks that got deleted, then we have
+ // references to the dangling symbols. Emit them at the start of the function
+ // so that we don't get references to undefined symbols.
+ std::vector<MCSymbol*> DeadBlockSyms;
+ MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+ for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+ OutStreamer.AddComment("Address taken block that was later removed");
+ OutStreamer.EmitLabel(DeadBlockSyms[i]);
+ }
+
+ // Add some workaround for linkonce linkage on Cygwin\MinGW.
+ if (MAI->getLinkOnceDirective() != 0 &&
+ (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) {
+ // FIXME: What is this?
+ MCSymbol *FakeStub =
+ OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+
+ CurrentFnSym->getName());
+ OutStreamer.EmitLabel(FakeStub);
+ }
+
+ // Emit pre-function debug and/or EH information.
+ if (DE) {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->BeginFunction(MF);
+ }
+ if (DD) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginFunction(MF);
+ }
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function. This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::EmitFunctionEntryLabel() {
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (CurrentFnSym->isUndefined())
+ return OutStreamer.EmitLabel(CurrentFnSym);
+
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
+}
+
+/// emitComments - Pretty-print comments for instructions.
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetMachine &TM = MF->getTarget();
+
+ // Check for spills and reloads
+ int FI;
+
+ const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+
+ // We assume a single instruction only has a spill or reload, not
+ // both.
+ const MachineMemOperand *MMO;
+ if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Reload\n";
+ }
+ } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Spill\n";
+ }
+ } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ }
+
+ // Check for spill-induced copies
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
+}
+
+/// emitImplicitDef - This method emits the specified machine instruction
+/// that is an implicit def.
+static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+ unsigned RegNo = MI->getOperand(0).getReg();
+ AP.OutStreamer.AddComment(Twine("implicit-def: ") +
+ AP.TM.getRegisterInfo()->getName(RegNo));
+ AP.OutStreamer.AddBlankLine();
+}
+
+static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
+ std::string Str = "kill:";
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+ assert(Op.isReg() && "KILL instruction must have only register operands");
+ Str += ' ';
+ Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
+ Str += (Op.isDef() ? "<def>" : "<kill>");
+ }
+ AP.OutStreamer.AddComment(Str);
+ AP.OutStreamer.AddBlankLine();
+}
+
+/// emitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so. A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+ // This code handles only the 3-operand target-independent form.
+ if (MI->getNumOperands() != 3)
+ return false;
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+ if (V.getContext().isSubprogram())
+ OS << DISubprogram(V.getContext()).getDisplayName() << ":";
+ OS << V.getName() << " <- ";
+
+ // Register or immediate value. Register 0 means undef.
+ if (MI->getOperand(0).isFPImm()) {
+ APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+ if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+ OS << (double)APF.convertToFloat();
+ } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+ OS << APF.convertToDouble();
+ } else {
+ // There is no good way to print long double. Convert a copy to
+ // double. Ah well, it's only a comment.
+ bool ignored;
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ OS << "(long double) " << APF.convertToDouble();
+ }
+ } else if (MI->getOperand(0).isImm()) {
+ OS << MI->getOperand(0).getImm();
+ } else if (MI->getOperand(0).isCImm()) {
+ MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+ } else {
+ assert(MI->getOperand(0).isReg() && "Unknown operand type");
+ if (MI->getOperand(0).getReg() == 0) {
+ // Suppress offset, it is not meaningful here.
+ OS << "undef";
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer.EmitRawText(OS.str());
+ return true;
+ }
+ OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+ }
+
+ OS << '+' << MI->getOperand(1).getImm();
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer.EmitRawText(OS.str());
+ return true;
+}
+
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
+ MF->getFunction()->needsUnwindTableEntry())
+ return CFI_M_EH;
+
+ if (MMI->hasDebugInfo())
+ return CFI_M_Debug;
+
+ return CFI_M_None;
+}
+
+bool AsmPrinter::needsSEHMoves() {
+ return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
+ MF->getFunction()->needsUnwindTableEntry();
+}
+
+bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
+ return MAI->doesDwarfUseRelocationsAcrossSections();
+}
+
+void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
+ MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+
+ if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+ return;
+
+ if (needsCFIMoves() == CFI_M_None)
+ return;
+
+ if (MMI->getCompactUnwindEncoding() != 0)
+ OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding());
+
+ MachineModuleInfo &MMI = MF->getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ bool FoundOne = false;
+ (void)FoundOne;
+ for (std::vector<MachineMove>::iterator I = Moves.begin(),
+ E = Moves.end(); I != E; ++I) {
+ if (I->getLabel() == Label) {
+ EmitCFIFrameMove(*I);
+ FoundOne = true;
+ }
+ }
+ assert(FoundOne);
+}
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::EmitFunctionBody() {
+ // Emit target-specific gunk before the function body.
+ EmitFunctionBodyStart();
+
+ bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
+
+ // Print out code for the function.
+ bool HasAnyRealCode = false;
+ const MachineInstr *LastMI = 0;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ EmitBasicBlockStart(I);
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ LastMI = II;
+
+ // Print the assembly for the instruction.
+ if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
+ !II->isDebugValue()) {
+ HasAnyRealCode = true;
+ ++EmittedInsts;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginInstruction(II);
+ }
+
+ if (isVerbose())
+ emitComments(*II, OutStreamer.GetCommentOS());
+
+ switch (II->getOpcode()) {
+ case TargetOpcode::PROLOG_LABEL:
+ emitPrologLabel(*II);
+ break;
+
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::GC_LABEL:
+ OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::INLINEASM:
+ EmitInlineAsm(II);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ if (isVerbose()) {
+ if (!emitDebugValueComment(II, *this))
+ EmitInstruction(II);
+ }
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ if (isVerbose()) emitImplicitDef(II, *this);
+ break;
+ case TargetOpcode::KILL:
+ if (isVerbose()) emitKill(II, *this);
+ break;
+ default:
+ if (!TM.hasMCUseLoc())
+ MCLineEntry::Make(&OutStreamer, getCurrentSection());
+
+ EmitInstruction(II);
+ break;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endInstruction(II);
+ }
+ }
+ }
+
+ // If the last instruction was a prolog label, then we have a situation where
+ // we emitted a prolog but no function body. This results in the ending prolog
+ // label equaling the end of function label and an invalid "row" in the
+ // FDE. We need to emit a noop in this situation so that the FDE's rows are
+ // valid.
+ bool RequiresNoop = LastMI && LastMI->isPrologLabel();
+
+ // If the function is empty and the object file uses .subsections_via_symbols,
+ // then we need to emit *something* to the function body to prevent the
+ // labels from collapsing together. Just emit a noop.
+ if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
+ MCInst Noop;
+ TM.getInstrInfo()->getNoopForMachoTarget(Noop);
+ if (Noop.getOpcode()) {
+ OutStreamer.AddComment("avoids zero-length function");
+ OutStreamer.EmitInstruction(Noop);
+ } else // Target not mc-ized yet.
+ OutStreamer.EmitRawText(StringRef("\tnop\n"));
+ }
+
+ const Function *F = MF->getFunction();
+ for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) {
+ const BasicBlock *BB = i;
+ if (!BB->hasAddressTaken())
+ continue;
+ MCSymbol *Sym = GetBlockAddressSymbol(BB);
+ if (Sym->isDefined())
+ continue;
+ OutStreamer.AddComment("Address of block that was removed by CodeGen");
+ OutStreamer.EmitLabel(Sym);
+ }
+
+ // Emit target-specific gunk after the function body.
+ EmitFunctionBodyEnd();
+
+ // If the target wants a .size directive for the size of the function, emit
+ // it.
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ // Create a symbol for the end of function, so we can get the size as
+ // difference between the function label and the temp label.
+ MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(FnEndLabel);
+
+ const MCExpr *SizeExp =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+ MCSymbolRefExpr::Create(CurrentFnSymForSize,
+ OutContext),
+ OutContext);
+ OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
+ }
+
+ // Emit post-function debug information.
+ if (DD) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endFunction(MF);
+ }
+ if (DE) {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndFunction();
+ }
+ MMI->EndFunction();
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo();
+
+ OutStreamer.AddBlankLine();
+}
+
+/// getDebugValueLocation - Get location information encoded by DBG_VALUE
+/// operands.
+MachineLocation AsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ // Target specific DBG_VALUE instructions are handled by each target.
+ return MachineLocation();
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+
+ for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0;
+ ++SR) {
+ Reg = TRI->getDwarfRegNum(*SR, false);
+ // FIXME: Get the bit range this register uses of the superregister
+ // so that we can produce a DW_OP_bit_piece
+ }
+
+ // FIXME: Handle cases like a super register being encoded as
+ // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33
+
+ // FIXME: We have no reasonable way of handling errors in here. The
+ // caller might be in the middle of an dwarf expression. We should
+ // probably assert that Reg >= 0 once debug info generation is more mature.
+
+ if (int Offset = MLoc.getOffset()) {
+ if (Reg < 32) {
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
+ EmitInt8(dwarf::DW_OP_breg0 + Reg);
+ } else {
+ OutStreamer.AddComment("DW_OP_bregx");
+ EmitInt8(dwarf::DW_OP_bregx);
+ OutStreamer.AddComment(Twine(Reg));
+ EmitULEB128(Reg);
+ }
+ EmitSLEB128(Offset);
+ } else {
+ if (Reg < 32) {
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+ EmitInt8(dwarf::DW_OP_reg0 + Reg);
+ } else {
+ OutStreamer.AddComment("DW_OP_regx");
+ EmitInt8(dwarf::DW_OP_regx);
+ OutStreamer.AddComment(Twine(Reg));
+ EmitULEB128(Reg);
+ }
+ }
+
+ // FIXME: Produce a DW_OP_bit_piece if we used a superregister
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+ // Emit global variables.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobalVariable(I);
+
+ // Emit visibility info for declarations
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ const Function &F = *I;
+ if (!F.isDeclaration())
+ continue;
+ GlobalValue::VisibilityTypes V = F.getVisibility();
+ if (V == GlobalValue::DefaultVisibility)
+ continue;
+
+ MCSymbol *Name = Mang->getSymbol(&F);
+ EmitVisibility(Name, V, false);
+ }
+
+ // Emit module flags.
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+ if (!ModuleFlags.empty())
+ getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM);
+
+ // Finalize debug and EH information.
+ if (DE) {
+ {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndModule();
+ }
+ delete DE; DE = 0;
+ }
+ if (DD) {
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endModule();
+ }
+ delete DD; DD = 0;
+ }
+
+ // If the target wants to know about weak references, print them all.
+ if (MAI->getWeakRefDirective()) {
+ // FIXME: This is not lazy, it would be nice to only print weak references
+ // to stuff that is actually used. Note that doing so would require targets
+ // to notice uses in operands (due to constant exprs etc). This should
+ // happen with the MC stuff eventually.
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ }
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ }
+ }
+
+ if (MAI->hasSetDirective()) {
+ OutStreamer.AddBlankLine();
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ MCSymbol *Name = Mang->getSymbol(I);
+
+ const GlobalValue *GV = I->getAliasedGlobal();
+ MCSymbol *Target = Mang->getSymbol(GV);
+
+ if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+ else if (I->hasWeakLinkage())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(I->hasLocalLinkage() && "Invalid alias linkage");
+
+ EmitVisibility(Name, I->getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ OutStreamer.EmitAssignment(Name,
+ MCSymbolRefExpr::Create(Target, OutContext));
+ }
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+ MP->finishAssembly(*this);
+
+ // If we don't have any trampolines, then we don't require stack memory
+ // to be executable. Some targets have a directive to declare this.
+ Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+ if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+ if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+ OutStreamer.SwitchSection(S);
+
+ // Allow the target to emit any magic that it wants at the end of the file,
+ // after everything else has gone out.
+ EmitEndOfAsmFile(M);
+
+ delete Mang; Mang = 0;
+ MMI = 0;
+
+ OutStreamer.Finish();
+ OutStreamer.reset();
+
+ return false;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+ // Get the function symbol.
+ CurrentFnSym = Mang->getSymbol(MF.getFunction());
+ CurrentFnSymForSize = CurrentFnSym;
+
+ if (isVerbose())
+ LI = &getAnalysis<MachineLoopInfo>();
+}
+
+namespace {
+ // SectionCPs - Keep track the alignment, constpool entries per Section.
+ struct SectionCPs {
+ const MCSection *S;
+ unsigned Alignment;
+ SmallVector<unsigned, 4> CPEs;
+ SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
+ };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool() {
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Calculate sections for constant pool entries. We collect entries to go into
+ // the same section together to reduce amount of section switch statements.
+ SmallVector<SectionCPs, 4> CPSections;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const MachineConstantPoolEntry &CPE = CP[i];
+ unsigned Align = CPE.getAlignment();
+
+ SectionKind Kind;
+ switch (CPE.getRelocationInfo()) {
+ default: llvm_unreachable("Unknown section kind");
+ case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) {
+ case 4: Kind = SectionKind::getMergeableConst4(); break;
+ case 8: Kind = SectionKind::getMergeableConst8(); break;
+ case 16: Kind = SectionKind::getMergeableConst16();break;
+ default: Kind = SectionKind::getMergeableConst(); break;
+ }
+ }
+
+ const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+
+ // The number of sections are small, just do a linear search from the
+ // last section to the first.
+ bool Found = false;
+ unsigned SecIdx = CPSections.size();
+ while (SecIdx != 0) {
+ if (CPSections[--SecIdx].S == S) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ SecIdx = CPSections.size();
+ CPSections.push_back(SectionCPs(S, Align));
+ }
+
+ if (Align > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Align;
+ CPSections[SecIdx].CPEs.push_back(i);
+ }
+
+ // Now print stuff into the calculated sections.
+ for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+ OutStreamer.SwitchSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+
+ unsigned Offset = 0;
+ for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+ unsigned CPI = CPSections[i].CPEs[j];
+ MachineConstantPoolEntry CPE = CP[CPI];
+
+ // Emit inter-object padding for alignment.
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+ OutStreamer.EmitZeros(NewOffset - Offset);
+
+ Type *Ty = CPE.getType();
+ Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
+ OutStreamer.EmitLabel(GetCPISymbol(CPI));
+
+ if (CPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(CPE.Val.ConstVal);
+ }
+ }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo() {
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (MJTI == 0) return;
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ const Function *F = MF->getFunction();
+ bool JTInDiffSection = false;
+ if (// In PIC mode, we need to emit the jump table to the same section as the
+ // function body itself, otherwise the label differences won't make sense.
+ // FIXME: Need a better predicate for this: what about custom entries?
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+ // We should also do if the section name is NULL or function is declared
+ // in discardable section
+ // FIXME: this isn't the right predicate, should be based on the MCSection
+ // for the function.
+ F->isWeakForLinker()) {
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
+ } else {
+ // Otherwise, drop it in the readonly section.
+ const MCSection *ReadOnlySection =
+ getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(ReadOnlySection);
+ JTInDiffSection = true;
+ }
+
+ EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout())));
+
+ // Jump tables in code sections are marked with a data_region directive
+ // where that's supported.
+ if (!JTInDiffSection)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
+
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For the EK_LabelDifference32 entry, if the target supports .set, emit a
+ // .set directive for each unique entry. This reduces the number of
+ // relocations the assembler will generate for the jump table.
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+ MAI->hasSetDirective()) {
+ SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+ const TargetLowering *TLI = TM.getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+ const MachineBasicBlock *MBB = JTBBs[ii];
+ if (!EmittedSets.insert(MBB)) continue;
+
+ // .set LJTSet, LBB32-base
+ const MCExpr *LHS =
+ MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+ MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+ }
+ }
+
+ // On some targets (e.g. Darwin) we want to emit two consecutive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
+ // FIXME: This doesn't have to have any specific name, just any randomly
+ // named and numbered 'l' label would work. Simplify GetJTISymbol.
+ OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
+
+ OutStreamer.EmitLabel(GetJTISymbol(JTI));
+
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+ EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ }
+ if (!JTInDiffSection)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+}
+
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned UID) const {
+ assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
+ const MCExpr *Value = 0;
+ switch (MJTI->getEntryKind()) {
+ case MachineJumpTableInfo::EK_Inline:
+ llvm_unreachable("Cannot emit EK_Inline jump table entry");
+ case MachineJumpTableInfo::EK_Custom32:
+ Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
+ OutContext);
+ break;
+ case MachineJumpTableInfo::EK_BlockAddress:
+ // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+ // .word LBB123
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ break;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+ // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gprel32 LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress: {
+ // EK_GPRel64BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gpdword LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_LabelDifference32: {
+ // EK_LabelDifference32 - Each entry is the address of the block minus
+ // the address of the jump table. This is used for PIC jump tables where
+ // gprel32 is not supported. e.g.:
+ // .word LBB123 - LJTI1_2
+ // If the .set directive is supported, this is emitted as:
+ // .set L4_5_set_123, LBB123 - LJTI1_2
+ // .word L4_5_set_123
+
+ // If we have emitted set directives for the jump table entries, print
+ // them rather than the entries themselves. If we're emitting PIC, then
+ // emit the table entries as differences between two text section labels.
+ if (MAI->hasSetDirective()) {
+ // If we used .set, reference the .set's symbol.
+ Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
+ OutContext);
+ break;
+ }
+ // Otherwise, use the difference as the jump table entry.
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
+ Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+ break;
+ }
+ }
+
+ assert(Value && "Unknown entry kind!");
+
+ unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
+ OutStreamer.EmitValue(Value, EntrySize);
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used") {
+ if (MAI->hasNoDeadStrip()) // No need to emit this at all.
+ EmitLLVMUsedList(GV->getInitializer());
+ return true;
+ }
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ if (GV->getName() == "llvm.global_ctors") {
+ EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".constructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".destructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ const GlobalValue *GV =
+ dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+ if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip);
+ }
+}
+
+typedef std::pair<unsigned, Constant*> Structor;
+
+static bool priority_order(const Structor& lhs, const Structor& rhs) {
+ return lhs.first < rhs.first;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
+/// priority.
+void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority.
+ if (!isa<ConstantArray>(List)) return;
+
+ // Sanity check the structors list.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (!InitList) return; // Not an array!
+ StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
+ if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs!
+ if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
+ !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
+
+ // Gather the structors in a form that's convenient for sorting by priority.
+ SmallVector<Structor, 8> Structors;
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
+ if (!CS) continue; // Malformed.
+ if (CS->getOperand(1)->isNullValue())
+ break; // Found a null terminator, skip the rest.
+ ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!Priority) continue; // Malformed.
+ Structors.push_back(std::make_pair(Priority->getLimitedValue(65535),
+ CS->getOperand(1)));
+ }
+
+ // Emit the function pointers in the target-specific order
+ const DataLayout *TD = TM.getDataLayout();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ std::stable_sort(Structors.begin(), Structors.end(), priority_order);
+ for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
+ const MCSection *OutputSection =
+ (isCtor ?
+ getObjFileLowering().getStaticCtorSection(Structors[i].first) :
+ getObjFileLowering().getStaticDtorSection(Structors[i].first));
+ OutStreamer.SwitchSection(OutputSection);
+ if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
+ EmitAlignment(Align);
+ EmitXXStructor(Structors[i].second);
+ }
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+ OutStreamer.EmitIntValue(Value, 1);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+ OutStreamer.EmitIntValue(Value, 2);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+ OutStreamer.EmitIntValue(Value, 4);
+}
+
+/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
+/// in bytes of the directive is specified by Size and Hi/Lo specify the
+/// labels. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+ unsigned Size) const {
+ // Get the Hi-Lo expression.
+ const MCExpr *Diff =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
+ MCSymbolRefExpr::Create(Lo, OutContext),
+ OutContext);
+
+ if (!MAI->hasSetDirective()) {
+ OutStreamer.EmitValue(Diff, Size);
+ return;
+ }
+
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ OutStreamer.EmitAssignment(SetLabel, Diff);
+ OutStreamer.EmitSymbolValue(SetLabel, Size);
+}
+
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
+/// where the size in bytes of the directive is specified by Size and Hi/Lo
+/// specify the labels. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+ const MCSymbol *Lo, unsigned Size)
+ const {
+
+ // Emit Hi+Offset - Lo
+ // Get the Hi+Offset expression.
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext),
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ // Get the Hi+Offset-Lo expression.
+ const MCExpr *Diff =
+ MCBinaryExpr::CreateSub(Plus,
+ MCSymbolRefExpr::Create(Lo, OutContext),
+ OutContext);
+
+ if (!MAI->hasSetDirective())
+ OutStreamer.EmitValue(Diff, 4);
+ else {
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ OutStreamer.EmitAssignment(SetLabel, Diff);
+ OutStreamer.EmitSymbolValue(SetLabel, 4);
+ }
+}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ unsigned Size)
+ const {
+
+ // Emit Label+Offset (or just Label if Offset is zero)
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext);
+ if (Offset)
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ OutStreamer.EmitValue(Expr, Size);
+}
+
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. For example, if you pass in 3 here, you will get an 8
+// byte alignment. If a global value is specified, and if that global has
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
+ if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits);
+
+ if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
+
+ if (getCurrentSection()->getKind().isText())
+ OutStreamer.EmitCodeAlignment(1 << NumBits);
+ else
+ OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
+/// lowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::Create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (CE == 0) {
+ llvm_unreachable("Unknown constant value to lower!");
+ }
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using DataLayout as a
+ // last resort before giving up.
+ if (Constant *C =
+ ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+ if (C != CE)
+ return lowerConstant(C, AP);
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ case Instruction::GetElementPtr: {
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ // Generate a symbolic expression for the byte address
+ APInt OffsetAI(TD.getPointerSizeInBits(), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
+
+ const MCExpr *Base = lowerConstant(CE->getOperand(0), AP);
+ if (!OffsetAI)
+ return Base;
+
+ int64_t Offset = OffsetAI.getSExtValue();
+ return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return lowerConstant(CE->getOperand(0), AP);
+
+ case Instruction::IntToPtr: {
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return lowerConstant(Op, AP);
+ }
+
+ case Instruction::PtrToInt: {
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = lowerConstant(Op, AP);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP);
+ const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
+ case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+ AsmPrinter &AP);
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const ConstantDataSequential *V) {
+ StringRef Data = V->getRawDataValues();
+ assert(!Data.empty() && "Empty aggregates should be CAZ node");
+ char C = Data[0];
+ for (unsigned i = 1, e = Data.size(); i != e; ++i)
+ if (Data[i] != C) return -1;
+ return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
+}
+
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64) return -1;
+
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType());
+ uint64_t Value = CI->getZExtValue();
+
+ // Make sure the constant is at least 8 bits long and has a power
+ // of 2 bit width. This guarantees the constant bit width is
+ // always a multiple of 8 bits, avoiding issues with padding out
+ // to Size and other such corner cases.
+ if (CI->getBitWidth() < 8 || !isPowerOf2_64(CI->getBitWidth())) return -1;
+
+ uint8_t Byte = static_cast<uint8_t>(Value);
+
+ for (unsigned i = 1; i < Size; ++i) {
+ Value >>= 8;
+ if (static_cast<uint8_t>(Value) != Byte) return -1;
+ }
+ return Byte;
+ }
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+ // Make sure all array elements are sequences of the same repeated
+ // byte.
+ assert(CA->getNumOperands() != 0 && "Should be a CAZ");
+ int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
+ if (Byte == -1) return -1;
+
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+ int ThisByte = isRepeatedByteSequence(CA->getOperand(i), TM);
+ if (ThisByte == -1) return -1;
+ if (Byte != ThisByte) return -1;
+ }
+ return Byte;
+ }
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
+ return isRepeatedByteSequence(CDS);
+
+ return -1;
+}
+
+static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
+ unsigned AddrSpace,AsmPrinter &AP){
+
+ // See if we can aggregate this into a .fill, if so, emit it as such.
+ int Value = isRepeatedByteSequence(CDS, AP.TM);
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType());
+ // Don't emit a 1-byte object as a .fill.
+ if (Bytes > 1)
+ return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+
+ // If this can be emitted with .ascii/.asciz, emit it as such.
+ if (CDS->isString())
+ return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
+
+ // Otherwise, emit the values in successive locations.
+ unsigned ElementByteSize = CDS->getElementByteSize();
+ if (isa<IntegerType>(CDS->getElementType())) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+ CDS->getElementAsInteger(i));
+ AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
+ ElementByteSize, AddrSpace);
+ }
+ } else if (ElementByteSize == 4) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision.
+ assert(CDS->getElementType()->isFloatTy());
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ float F;
+ uint32_t I;
+ };
+
+ F = CDS->getElementAsFloat(i);
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
+ AP.OutStreamer.EmitIntValue(I, 4, AddrSpace);
+ }
+ } else {
+ assert(CDS->getElementType()->isDoubleTy());
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ double F;
+ uint64_t I;
+ };
+
+ F = CDS->getElementAsDouble(i);
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
+ AP.OutStreamer.EmitIntValue(I, 8, AddrSpace);
+ }
+ }
+
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ unsigned Size = TD.getTypeAllocSize(CDS->getType());
+ unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
+ CDS->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer.EmitZeros(Padding, AddrSpace);
+
+}
+
+static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ // See if we can aggregate some values. Make sure it can be
+ // represented as a series of bytes of the constant value.
+ int Value = isRepeatedByteSequence(CA, AP.TM);
+
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType());
+ AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+ else {
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ }
+}
+
+static void emitGlobalConstantVector(const ConstantVector *CV,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ unsigned Size = TD.getTypeAllocSize(CV->getType());
+ unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
+ CV->getType()->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer.EmitZeros(Padding, AddrSpace);
+}
+
+static void emitGlobalConstantStruct(const ConstantStruct *CS,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ // Print the fields in successive locations. Pad to align if needed!
+ const DataLayout *TD = AP.TM.getDataLayout();
+ unsigned Size = TD->getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = TD->getStructLayout(CS->getType());
+ uint64_t SizeSoFar = 0;
+ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+ const Constant *Field = CS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t FieldSize = TD->getTypeAllocSize(Field->getType());
+ uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
+ - Layout->getElementOffset(i)) - FieldSize;
+ SizeSoFar += FieldSize + PadSize;
+
+ // Now print the actual field value.
+ emitGlobalConstantImpl(Field, AddrSpace, AP);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ AP.OutStreamer.EmitZeros(PadSize, AddrSpace);
+ }
+ assert(SizeSoFar == Layout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
+
+ // First print a comment with what we think the original floating-point value
+ // should have been.
+ if (AP.isVerbose()) {
+ SmallString<8> StrVal;
+ CFP->getValueAPF().toString(StrVal);
+
+ CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+ AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
+ }
+
+ // Now iterate through the APInt chunks, emitting them in endian-correct
+ // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit
+ // floats).
+ unsigned NumBytes = API.getBitWidth() / 8;
+ unsigned TrailingBytes = NumBytes % sizeof(uint64_t);
+ const uint64_t *p = API.getRawData();
+
+ // PPC's long double has odd notions of endianness compared to how LLVM
+ // handles it: p[0] goes first for *big* endian on PPC.
+ if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) {
+ int Chunk = API.getNumWords() - 1;
+
+ if (TrailingBytes)
+ AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace);
+
+ for (; Chunk >= 0; --Chunk)
+ AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
+ } else {
+ unsigned Chunk;
+ for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
+ AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
+
+ if (TrailingBytes)
+ AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace);
+ }
+
+ // Emit the tail padding for the long double.
+ const DataLayout &TD = *AP.TM.getDataLayout();
+ AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
+ TD.getTypeStoreSize(CFP->getType()), AddrSpace);
+}
+
+static void emitGlobalConstantLargeInt(const ConstantInt *CI,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ const DataLayout *TD = AP.TM.getDataLayout();
+ unsigned BitWidth = CI->getBitWidth();
+ assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
+
+ // We don't expect assemblers to support integer data directives
+ // for more than 64 bits, so we emit the data in at most 64-bit
+ // quantities at a time.
+ const uint64_t *RawData = CI->getValue().getRawData();
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ }
+}
+
+static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ const DataLayout *TD = AP.TM.getDataLayout();
+ uint64_t Size = TD->getTypeAllocSize(CV->getType());
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
+ return AP.OutStreamer.EmitZeros(Size, AddrSpace);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ switch (Size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+ CI->getZExtValue());
+ AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+ return;
+ default:
+ emitGlobalConstantLargeInt(CI, AddrSpace, AP);
+ return;
+ }
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
+ return emitGlobalConstantFP(CFP, AddrSpace, AP);
+
+ if (isa<ConstantPointerNull>(CV)) {
+ AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
+ return;
+ }
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
+ return emitGlobalConstantDataSequential(CDS, AddrSpace, AP);
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+ return emitGlobalConstantArray(CVA, AddrSpace, AP);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return emitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+ // vectors).
+ if (CE->getOpcode() == Instruction::BitCast)
+ return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+
+ if (Size > 8) {
+ // If the constant expression's size is greater than 64-bits, then we have
+ // to emit the value in chunks. Try to constant fold the value and emit it
+ // that way.
+ Constant *New = ConstantFoldConstantExpression(CE, TD);
+ if (New && New != CE)
+ return emitGlobalConstantImpl(New, AddrSpace, AP);
+ }
+ }
+
+ if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+ return emitGlobalConstantVector(V, AddrSpace, AP);
+
+ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
+ // thread the streamer with EmitValue.
+ AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+ if (Size)
+ emitGlobalConstantImpl(CV, AddrSpace, *this);
+ else if (MAI->hasSubsectionsViaSymbols()) {
+ // If the global has zero size, emit a single byte so that two labels don't
+ // look like they are at the same location.
+ OutStreamer.EmitIntValue(0, 1, AddrSpace);
+ }
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
+}
+
+void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
+ if (Offset > 0)
+ OS << '+' << Offset;
+ else if (Offset < 0)
+ OS << Offset;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Lowering Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
+/// temporary label with the specified stem and unique ID.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const {
+ return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+ Name + Twine(ID));
+}
+
+/// GetTempSymbol - Return an assembler temporary label with the specified
+/// stem.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const {
+ return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+
+ Name);
+}
+
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+ return MMI->getAddrLabelSymbol(BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+ return MMI->getAddrLabelSymbol(BB);
+}
+
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ return OutContext.GetOrCreateSymbol
+ (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+ + "_" + Twine(CPID));
+}
+
+/// GetJTISymbol - Return the symbol for the specified jump table entry.
+MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
+ return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+ return OutContext.GetOrCreateSymbol
+ (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
+ Twine(UID) + "_set_" + Twine(MBBID));
+}
+
+/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
+/// global value name as its base, with the specified suffix, and where the
+/// symbol is forced to have private linkage if ForcePrivate is true.
+MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
+ StringRef Suffix,
+ bool ForcePrivate) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
+ NameStr.append(Suffix.begin(), Suffix.end());
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
+/// ExternalSymbol.
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, Sym);
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+
+
+/// PrintParentLoopComment - Print comments about parent loops of this one.
+static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ if (Loop == 0) return;
+ PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
+ OS.indent(Loop->getLoopDepth()*2)
+ << "Parent Loop BB" << FunctionNumber << "_"
+ << Loop->getHeader()->getNumber()
+ << " Depth=" << Loop->getLoopDepth() << '\n';
+}
+
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ // Add child loop information
+ for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){
+ OS.indent((*CL)->getLoopDepth()*2)
+ << "Child Loop BB" << FunctionNumber << "_"
+ << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth()
+ << '\n';
+ PrintChildLoopComment(OS, *CL, FunctionNumber);
+ }
+}
+
+/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+ const MachineLoopInfo *LI,
+ const AsmPrinter &AP) {
+ // Add loop depth information
+ const MachineLoop *Loop = LI->getLoopFor(&MBB);
+ if (Loop == 0) return;
+
+ MachineBasicBlock *Header = Loop->getHeader();
+ assert(Header && "No header for loop");
+
+ // If this block is not a loop header, just print out what is the loop header
+ // and return.
+ if (Header != &MBB) {
+ AP.OutStreamer.AddComment(" in Loop: Header=BB" +
+ Twine(AP.getFunctionNumber())+"_" +
+ Twine(Loop->getHeader()->getNumber())+
+ " Depth="+Twine(Loop->getLoopDepth()));
+ return;
+ }
+
+ // Otherwise, it is a loop header. Print out information about child and
+ // parent loops.
+ raw_ostream &OS = AP.OutStreamer.GetCommentOS();
+
+ PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
+
+ OS << "=>";
+ OS.indent(Loop->getLoopDepth()*2-2);
+
+ OS << "This ";
+ if (Loop->empty())
+ OS << "Inner ";
+ OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
+
+ PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
+}
+
+
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+ // Emit an alignment directive for this block, if needed.
+ if (unsigned Align = MBB->getAlignment())
+ EmitAlignment(Align);
+
+ // If the block has its address taken, emit any labels that were used to
+ // reference the block. It is possible that there is more than one label
+ // here, because multiple LLVM BB's may have been RAUW'd to this block after
+ // the references were generated.
+ if (MBB->hasAddressTaken()) {
+ const BasicBlock *BB = MBB->getBasicBlock();
+ if (isVerbose())
+ OutStreamer.AddComment("Block address taken");
+
+ std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
+
+ for (unsigned i = 0, e = Syms.size(); i != e; ++i)
+ OutStreamer.EmitLabel(Syms[i]);
+ }
+
+ // Print some verbose block comments.
+ if (isVerbose()) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+ emitBasicBlockLoopComments(*MBB, LI, *this);
+ }
+
+ // Print the main label for the block.
+ if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
+ Twine(MBB->getNumber()) + ":");
+ }
+ } else {
+ OutStreamer.EmitLabel(MBB->getSymbol());
+ }
+}
+
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+ bool IsDefinition) const {
+ MCSymbolAttr Attr = MCSA_Invalid;
+
+ switch (Visibility) {
+ default: break;
+ case GlobalValue::HiddenVisibility:
+ if (IsDefinition)
+ Attr = MAI->getHiddenVisibilityAttr();
+ else
+ Attr = MAI->getHiddenDeclarationVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Attr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
+
+ if (Attr != MCSA_Invalid)
+ OutStreamer.EmitSymbolAttribute(Sym, Attr);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check the terminators in the previous blocks
+ for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
+ IE = Pred->end(); II != IE; ++II) {
+ MachineInstr &MI = *II;
+
+ // If it is not a simple branch, we are in a table somewhere.
+ if (!MI.isBranch() || MI.isIndirectBranch())
+ return false;
+
+ // If we are the operands of one of the branches, this is not
+ // a fall through.
+ for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+ OE = MI.operands_end(); OI != OE; ++OI) {
+ const MachineOperand& OP = *OI;
+ if (OP.isJTI())
+ return false;
+ if (OP.isMBB() && OP.getMBB() == MBB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
+ if (!S->usesMetadata())
+ return 0;
+
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+ gcp_map_type::iterator GCPI = GCMap.find(S);
+ if (GCPI != GCMap.end())
+ return GCPI->second;
+
+ const char *Name = S->getName().c_str();
+
+ for (GCMetadataPrinterRegistry::iterator
+ I = GCMetadataPrinterRegistry::begin(),
+ E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+ if (strcmp(Name, I->getName()) == 0) {
+ GCMetadataPrinter *GMP = I->instantiate();
+ GMP->S = S;
+ GCMap.insert(std::make_pair(S, GMP));
+ return GMP;
+ }
+
+ report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
new file mode 100644
index 0000000..156acac
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -0,0 +1,196 @@
+//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dwarf emissions parts of AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Dwarf Emission Helper Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
+ if (isVerbose() && Desc)
+ OutStreamer.AddComment(Desc);
+
+ OutStreamer.EmitSLEB128IntValue(Value);
+}
+
+/// EmitULEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
+ unsigned PadTo) const {
+ if (isVerbose() && Desc)
+ OutStreamer.AddComment(Desc);
+
+ OutStreamer.EmitULEB128IntValue(Value, PadTo);
+}
+
+/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+void AsmPrinter::EmitCFAByte(unsigned Val) const {
+ if (isVerbose()) {
+ if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
+ OutStreamer.AddComment("DW_CFA_offset + Reg (" +
+ Twine(Val-dwarf::DW_CFA_offset) + ")");
+ else
+ OutStreamer.AddComment(dwarf::CallFrameString(Val));
+ }
+ OutStreamer.EmitIntValue(Val, 1);
+}
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+ switch (Encoding) {
+ case dwarf::DW_EH_PE_absptr: return "absptr";
+ case dwarf::DW_EH_PE_omit: return "omit";
+ case dwarf::DW_EH_PE_pcrel: return "pcrel";
+ case dwarf::DW_EH_PE_udata4: return "udata4";
+ case dwarf::DW_EH_PE_udata8: return "udata8";
+ case dwarf::DW_EH_PE_sdata4: return "sdata4";
+ case dwarf::DW_EH_PE_sdata8: return "sdata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+ return "indirect pcrel udata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+ return "indirect pcrel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+ return "indirect pcrel udata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+ return "indirect pcrel sdata8";
+ }
+
+ return "<unknown encoding>";
+}
+
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding. If verbose assembly output is enabled, we output comments
+/// describing the encoding. Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
+ if (isVerbose()) {
+ if (Desc != 0)
+ OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
+ Twine(DecodeDWARFEncoding(Val)));
+ else
+ OutStreamer.AddComment(Twine("Encoding = ") +
+ DecodeDWARFEncoding(Val));
+ }
+
+ OutStreamer.EmitIntValue(Val, 1);
+}
+
+/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return 0;
+
+ switch (Encoding & 0x07) {
+ default: llvm_unreachable("Invalid encoded value.");
+ case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
+ case dwarf::DW_EH_PE_udata2: return 2;
+ case dwarf::DW_EH_PE_udata4: return 4;
+ case dwarf::DW_EH_PE_udata8: return 8;
+ }
+}
+
+void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
+ unsigned Encoding) const {
+ if (GV) {
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ const MCExpr *Exp =
+ TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+ OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
+ } else
+ OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
+}
+
+/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
+/// section. This can be done with a special directive if the target supports
+/// it (e.g. cygwin) or by emitting it as an offset from a label at the start
+/// of the section.
+///
+/// SectionLabel is a temporary label emitted at the start of the section that
+/// Label lives in.
+void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
+ const MCSymbol *SectionLabel) const {
+ // On COFF targets, we have to emit the special .secrel32 directive.
+ if (MAI->getDwarfSectionOffsetDirective()) {
+ OutStreamer.EmitCOFFSecRel32(Label);
+ return;
+ }
+
+ // Get the section that we're referring to, based on SectionLabel.
+ const MCSection &Section = SectionLabel->getSection();
+
+ // If Label has already been emitted, verify that it is in the same section as
+ // section label for sanity.
+ assert((!Label->isInSection() || &Label->getSection() == &Section) &&
+ "Section offset using wrong section base for label");
+
+ // If the section in question will end up with an address of 0 anyway, we can
+ // just emit an absolute reference to save a relocation.
+ if (Section.isBaseAddressKnownZero()) {
+ OutStreamer.EmitSymbolValue(Label, 4);
+ return;
+ }
+
+ // Otherwise, emit it as a label difference from the start of the section.
+ EmitLabelDifference(Label, SectionLabel, 4);
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Lowering Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitCFIFrameMove - Emit a frame instruction.
+void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+ } else {
+ // Reg + Offset
+ OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true),
+ Src.getOffset());
+ }
+ } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ assert(Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+ } else {
+ assert(!Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+ Dst.getOffset());
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
new file mode 100644
index 0000000..abfa330
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -0,0 +1,553 @@
+//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the inline assembler pieces of the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+namespace {
+ struct SrcMgrDiagInfo {
+ const MDNode *LocInfo;
+ LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+ void *DiagContext;
+ };
+}
+
+/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+ SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+ assert(DiagInfo && "Diagnostic context not passed down?");
+
+ // If the inline asm had metadata associated with it, pull out a location
+ // cookie corresponding to which line the error occurred on.
+ unsigned LocCookie = 0;
+ if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ unsigned ErrorLine = Diag.getLineNo()-1;
+ if (ErrorLine >= LocInfo->getNumOperands())
+ ErrorLine = 0;
+
+ if (LocInfo->getNumOperands() != 0)
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ LocCookie = CI->getZExtValue();
+ }
+
+ DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
+/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
+ InlineAsm::AsmDialect Dialect) const {
+ assert(!Str.empty() && "Can't emit empty inline asm block");
+
+ // Remember if the buffer is nul terminated or not so we can avoid a copy.
+ bool isNullTerminated = Str.back() == 0;
+ if (isNullTerminated)
+ Str = Str.substr(0, Str.size()-1);
+
+ // If the output streamer is actually a .s file, just emit the blob textually.
+ // This is useful in case the asm parser doesn't handle something but the
+ // system assembler does.
+ if (OutStreamer.hasRawTextSupport()) {
+ OutStreamer.EmitRawText(Str);
+ return;
+ }
+
+ SourceMgr SrcMgr;
+ SrcMgrDiagInfo DiagInfo;
+
+ // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
+ LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+ bool HasDiagHandler = false;
+ if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+ // If the source manager has an issue, we arrange for srcMgrDiagHandler
+ // to be invoked, getting DiagInfo passed into it.
+ DiagInfo.LocInfo = LocMDNode;
+ DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo);
+ HasDiagHandler = true;
+ }
+
+ MemoryBuffer *Buffer;
+ if (isNullTerminated)
+ Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
+ else
+ Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+
+ // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+ SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+
+ OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr,
+ OutContext, OutStreamer,
+ *MAI));
+
+ // FIXME: It would be nice if we can avoid createing a new instance of
+ // MCSubtargetInfo here given TargetSubtargetInfo is available. However,
+ // we have to watch out for asm directives which can change subtarget
+ // state. e.g. .code 16, .code 32.
+ OwningPtr<MCSubtargetInfo>
+ STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(),
+ TM.getTargetCPU(),
+ TM.getTargetFeatureString()));
+ OwningPtr<MCTargetAsmParser>
+ TAP(TM.getTarget().createMCAsmParser(*STI, *Parser));
+ if (!TAP)
+ report_fatal_error("Inline asm not supported by this streamer because"
+ " we don't have an asm parser for this target\n");
+ Parser->setAssemblerDialect(Dialect);
+ Parser->setTargetParser(*TAP.get());
+
+ // Don't implicitly switch to the text section before the asm.
+ int Res = Parser->Run(/*NoInitialTextSection*/ true,
+ /*NoFinalize*/ true);
+ if (Res && !HasDiagHandler)
+ report_fatal_error("Error parsing inline asm\n");
+}
+
+static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, int InlineAsmVariant,
+ AsmPrinter *AP, unsigned LocCookie,
+ raw_ostream &OS) {
+ // Switch to the inline assembly variant.
+ OS << "\t.intel_syntax\n\t";
+
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+ unsigned NumOperands = MI->getNumOperands();
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+
+ OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$':
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ }
+ if (Done) break;
+
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
+
+ if (Val >= NumOperands-1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() ||
+ MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
+ /*Modifier*/ 0, OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+ /*Modifier*/ 0, OS);
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ break;
+ }
+ }
+ }
+ OS << "\n\t.att_syntax\n" << (char)0; // null terminate string.
+}
+
+static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, int InlineAsmVariant,
+ int AsmPrinterVariant, AsmPrinter *AP,
+ unsigned LocCookie, raw_ostream &OS) {
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+ unsigned NumOperands = MI->getNumOperands();
+
+ OS << '\t';
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1)
+ report_fatal_error("Nested variants found in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // consume '|' character.
+ if (CurVariant == -1)
+ OS << '|'; // this is gcc's behavior for | outside a variant
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // consume ')' character.
+ if (CurVariant == -1)
+ OS << '}'; // this is gcc's behavior for } outside a variant
+ else
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (HasCurlyBraces && *LastEmitted == ':') {
+ ++LastEmitted;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (StrEnd == 0)
+ report_fatal_error("Unterminated ${:foo} operand in inline asm"
+ " string: '" + Twine(AsmStr) + "'");
+
+ std::string Val(StrStart, StrEnd);
+ AP->PrintSpecial(MI, OS, Val.c_str());
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0)
+ report_fatal_error("Bad ${:} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}')
+ report_fatal_error("Bad ${} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ if (Val >= NumOperands-1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() ||
+ MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (Modifier[0] == 'l') // labels are target independent
+ // FIXME: What if the operand isn't an MBB, report error?
+ OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
+ else {
+ if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
+ Modifier[0] ? Modifier : 0,
+ OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+ Modifier[0] ? Modifier : 0, OS);
+ }
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ }
+ break;
+ }
+ }
+ }
+ OS << '\n' << (char)0; // null terminate string.
+}
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+ assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ // Don't emit the comments if writing to a .o file.
+ if (!OutStreamer.hasRawTextSupport()) return;
+
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmEnd());
+ return;
+ }
+
+ // Emit the #APP start marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use EmitRawText.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+
+ // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+ // it.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
+ for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+ if (MI->getOperand(i-1).isMetadata() &&
+ (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ // Emit the inline asm to a temporary string so we can emit it through
+ // EmitInlineAsm.
+ SmallString<256> StringData;
+ raw_svector_ostream OS(StringData);
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+ InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect();
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if (InlineAsmVariant == InlineAsm::AD_ATT)
+ EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant,
+ AP, LocCookie, OS);
+ else
+ EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
+
+ EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
+
+ // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use EmitRawText.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmEnd());
+}
+
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+ const char *Code) const {
+ if (!strcmp(Code, "private")) {
+ OS << MAI->getPrivateGlobalPrefix();
+ } else if (!strcmp(Code, "comment")) {
+ OS << MAI->getCommentString();
+ } else if (!strcmp(Code, "uid")) {
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+
+ // If this is a new LastFn instruction, bump the counter.
+ if (LastMI != MI || LastFn != getFunctionNumber()) {
+ ++Counter;
+ LastMI = MI;
+ LastFn = getFunctionNumber();
+ }
+ OS << Counter;
+ } else {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ report_fatal_error(Msg.str());
+ }
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// override this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (ExtraCode[0]) {
+ default:
+ return true; // Unknown modifier.
+ case 'c': // Substitute immediate value without immediate syntax
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << MO.getImm();
+ return false;
+ case 'n': // Negate the immediate constant.
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << -MO.getImm();
+ return false;
+ }
+ }
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Target doesn't support this yet!
+ return true;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 0000000..57e0acd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,368 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Attribute);
+ ID.AddInteger(Form);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Tag);
+ ID.AddInteger(ChildrenFlag);
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(AsmPrinter *AP) const {
+ // Emit its Dwarf tag type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+
+ // Emit whether it has children DIEs.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(AttrData.getAttribute(),
+ dwarf::AttributeString(AttrData.getAttribute()));
+
+ // Emit form type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(AttrData.getForm(),
+ dwarf::FormEncodingString(AttrData.getForm()));
+ }
+
+ // Mark end of abbreviation.
+ AP->EmitULEB128(0, "EOM(1)");
+ AP->EmitULEB128(0, "EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(raw_ostream &O) {
+ O << "Abbreviation @"
+ << format("0x%lx", (long)(intptr_t)this)
+ << " "
+ << dwarf::TagString(Tag)
+ << " "
+ << dwarf::ChildrenString(ChildrenFlag)
+ << '\n';
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << dwarf::AttributeString(Data[i].getAttribute())
+ << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << '\n';
+ }
+}
+void DIEAbbrev::dump() { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE::~DIE() {
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ delete Children[i];
+}
+
+/// Climb up the parent chain to get the compile unit DIE this DIE belongs to.
+DIE *DIE::getCompileUnit() const{
+ DIE *p = getParent();
+ while (p) {
+ if (p->getTag() == dwarf::DW_TAG_compile_unit)
+ return p;
+ p = p->getParent();
+ }
+ llvm_unreachable("We should not have orphaned DIEs.");
+}
+
+#ifndef NDEBUG
+void DIE::print(raw_ostream &O, unsigned IncIndent) {
+ IndentCount += IncIndent;
+ const std::string Indent(IndentCount, ' ');
+ bool isBlock = Abbrev.getTag() == 0;
+
+ if (!isBlock) {
+ O << Indent
+ << "Die: "
+ << format("0x%lx", (long)(intptr_t)this)
+ << ", Offset: " << Offset
+ << ", Size: " << Size << "\n";
+
+ O << Indent
+ << dwarf::TagString(Abbrev.getTag())
+ << " "
+ << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n";
+ } else {
+ O << "Size: " << Size << "\n";
+ }
+
+ const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData();
+
+ IndentCount += 2;
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << Indent;
+
+ if (!isBlock)
+ O << dwarf::AttributeString(Data[i].getAttribute());
+ else
+ O << "Blk[" << i << "]";
+
+ O << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << " ";
+ Values[i]->print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ Children[j]->print(O, 4);
+ }
+
+ if (!isBlock) O << "\n";
+ IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+ print(dbgs());
+}
+#endif
+
+void DIEValue::anchor() { }
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+ unsigned Size = ~0U;
+ switch (Form) {
+ case dwarf::DW_FORM_flag_present:
+ // Emit something to keep the lines and comments in sync.
+ // FIXME: Is there a better way to do this?
+ if (Asm->OutStreamer.hasRawTextSupport())
+ Asm->OutStreamer.EmitRawText(StringRef(""));
+ return;
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: Size = 1; break;
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: Size = 2; break;
+ case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: Size = 4; break;
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: Size = 8; break;
+ case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+ case dwarf::DW_FORM_addr:
+ Size = Asm->getDataLayout().getPointerSize(); break;
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+ Asm->OutStreamer.EmitIntValue(Integer, Size);
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_flag_present: return 0;
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: return sizeof(int16_t);
+ case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+ case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize();
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+}
+
+#ifndef NDEBUG
+void DIEInteger::print(raw_ostream &O) {
+ O << "Int: " << (int64_t)Integer << " 0x";
+ O.write_hex(Integer);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIELabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_sec_offset) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
+ return AP->getDataLayout().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIELabel::print(raw_ostream &O) {
+ O << "Lbl: " << Label->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
+ return AP->getDataLayout().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEDelta::print(raw_ostream &O) {
+ O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->EmitInt32(Entry->getOffset());
+}
+
+#ifndef NDEBUG
+void DIEEntry::print(raw_ostream &O) {
+ O << format("Die: 0x%lx", (long)(intptr_t)Entry);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
+ if (!Size) {
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+ switch (Form) {
+ default: llvm_unreachable("Improper form for block");
+ case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
+ }
+
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
+ default: llvm_unreachable("Improper form for block");
+ }
+}
+
+#ifndef NDEBUG
+void DIEBlock::print(raw_ostream &O) {
+ O << "Blk: ";
+ DIE::print(O, 5);
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
new file mode 100644
index 0000000..c332aa2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
@@ -0,0 +1,392 @@
+//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DIE_H__
+#define CODEGEN_ASMPRINTER_DIE_H__
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include <vector>
+
+namespace llvm {
+ class AsmPrinter;
+ class MCSymbol;
+ class raw_ostream;
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+ /// Dwarf abbreviation.
+ class DIEAbbrevData {
+ /// Attribute - Dwarf attribute code.
+ ///
+ uint16_t Attribute;
+
+ /// Form - Dwarf form code.
+ ///
+ uint16_t Form;
+ public:
+ DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {}
+
+ // Accessors.
+ uint16_t getAttribute() const { return Attribute; }
+ uint16_t getForm() const { return Form; }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+ /// information object.
+ class DIEAbbrev : public FoldingSetNode {
+ /// Tag - Dwarf tag code.
+ ///
+ uint16_t Tag;
+
+ /// ChildrenFlag - Dwarf children flag.
+ ///
+ uint16_t ChildrenFlag;
+
+ /// Unique number for node.
+ ///
+ unsigned Number;
+
+ /// Data - Raw data bytes for abbreviation.
+ ///
+ SmallVector<DIEAbbrevData, 12> Data;
+
+ public:
+ DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
+
+ // Accessors.
+ uint16_t getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+ uint16_t getChildrenFlag() const { return ChildrenFlag; }
+ const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
+ void setTag(uint16_t T) { Tag = T; }
+ void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
+ void setNumber(unsigned N) { Number = N; }
+
+ /// AddAttribute - Adds another set of attribute information to the
+ /// abbreviation.
+ void AddAttribute(uint16_t Attribute, uint16_t Form) {
+ Data.push_back(DIEAbbrevData(Attribute, Form));
+ }
+
+ /// AddFirstAttribute - Adds a set of attribute information to the front
+ /// of the abbreviation.
+ void AddFirstAttribute(uint16_t Attribute, uint16_t Form) {
+ Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+ }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+
+ /// Emit - Print the abbreviation using the specified asm printer.
+ ///
+ void Emit(AsmPrinter *AP) const;
+
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIE - A structured debug information entry. Has an abbreviation which
+ /// describes its organization.
+ class DIEValue;
+
+ class DIE {
+ protected:
+ /// Offset - Offset in debug info section.
+ ///
+ unsigned Offset;
+
+ /// Size - Size of instance + children.
+ ///
+ unsigned Size;
+
+ /// Abbrev - Buffer for constructing abbreviation.
+ ///
+ DIEAbbrev Abbrev;
+
+ /// Children DIEs.
+ ///
+ std::vector<DIE *> Children;
+
+ DIE *Parent;
+
+ /// Attribute values.
+ ///
+ SmallVector<DIEValue*, 12> Values;
+
+ // Private data for print()
+ mutable unsigned IndentCount;
+ public:
+ explicit DIE(unsigned Tag)
+ : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
+ IndentCount(0) {}
+ virtual ~DIE();
+
+ // Accessors.
+ DIEAbbrev &getAbbrev() { return Abbrev; }
+ unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+ unsigned getTag() const { return Abbrev.getTag(); }
+ unsigned getOffset() const { return Offset; }
+ unsigned getSize() const { return Size; }
+ const std::vector<DIE *> &getChildren() const { return Children; }
+ const SmallVectorImpl<DIEValue*> &getValues() const { return Values; }
+ DIE *getParent() const { return Parent; }
+ /// Climb up the parent chain to get the compile unit DIE this DIE belongs
+ /// to.
+ DIE *getCompileUnit() const;
+ void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+ void setOffset(unsigned O) { Offset = O; }
+ void setSize(unsigned S) { Size = S; }
+
+ /// addValue - Add a value and attributes to a DIE.
+ ///
+ void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+ Abbrev.AddAttribute(Attribute, Form);
+ Values.push_back(Value);
+ }
+
+ /// addChild - Add a child to the DIE.
+ ///
+ void addChild(DIE *Child) {
+ if (Child->getParent()) {
+ assert (Child->getParent() == this && "Unexpected DIE Parent!");
+ return;
+ }
+ Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+ Children.push_back(Child);
+ Child->Parent = this;
+ }
+
+#ifndef NDEBUG
+ void print(raw_ostream &O, unsigned IncIndent = 0);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEValue - A debug information entry value.
+ ///
+ class DIEValue {
+ virtual void anchor();
+ public:
+ enum {
+ isInteger,
+ isString,
+ isLabel,
+ isDelta,
+ isEntry,
+ isBlock
+ };
+ protected:
+ /// Type - Type of data stored in the value.
+ ///
+ unsigned Type;
+ public:
+ explicit DIEValue(unsigned T) : Type(T) {}
+ virtual ~DIEValue() {}
+
+ // Accessors
+ unsigned getType() const { return Type; }
+
+ /// EmitValue - Emit value via the Dwarf writer.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0;
+
+ /// SizeOf - Return the size of a value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O) = 0;
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEInteger - An integer value DIE.
+ ///
+ class DIEInteger : public DIEValue {
+ uint64_t Integer;
+ public:
+ explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+ /// BestForm - Choose the best form for integer.
+ ///
+ static unsigned BestForm(bool IsSigned, uint64_t Int) {
+ if (IsSigned) {
+ const int64_t SignedInt = Int;
+ if ((char)Int == SignedInt) return dwarf::DW_FORM_data1;
+ if ((short)Int == SignedInt) return dwarf::DW_FORM_data2;
+ if ((int)Int == SignedInt) return dwarf::DW_FORM_data4;
+ } else {
+ if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
+ if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
+ if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4;
+ }
+ return dwarf::DW_FORM_data8;
+ }
+
+ /// EmitValue - Emit integer of appropriate size.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ uint64_t getValue() const { return Integer; }
+
+ /// SizeOf - Determine size of integer value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIELabel - A label expression DIE.
+ //
+ class DIELabel : public DIEValue {
+ const MCSymbol *Label;
+ public:
+ explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// getValue - Get MCSymbol.
+ ///
+ const MCSymbol *getValue() const { return Label; }
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEDelta - A simple label difference DIE.
+ ///
+ class DIEDelta : public DIEValue {
+ const MCSymbol *LabelHi;
+ const MCSymbol *LabelLo;
+ public:
+ DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
+ : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+ /// EmitValue - Emit delta value.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of delta value in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEEntry - A pointer to another debug information entry. An instance of
+ /// this class can also be used as a proxy for a debug information entry not
+ /// yet defined (ie. types.)
+ class DIEEntry : public DIEValue {
+ DIE *const Entry;
+ public:
+ explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+
+ DIE *getEntry() const { return Entry; }
+
+ /// EmitValue - Emit debug information entry offset.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of debug information entry in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const {
+ return sizeof(int32_t);
+ }
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEBlock - A block of values. Primarily used for location expressions.
+ //
+ class DIEBlock : public DIEValue, public DIE {
+ unsigned Size; // Size in bytes excluding size header.
+ public:
+ DIEBlock()
+ : DIEValue(isBlock), DIE(0), Size(0) {}
+ virtual ~DIEBlock() {}
+
+ /// ComputeSize - calculate the size of the block.
+ ///
+ unsigned ComputeSize(AsmPrinter *AP);
+
+ /// BestForm - Choose the best form for data.
+ ///
+ unsigned BestForm() const {
+ if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1;
+ if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
+ if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4;
+ return dwarf::DW_FORM_block;
+ }
+
+ /// EmitValue - Emit block data.
+ ///
+ virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+ /// SizeOf - Determine size of block data in bytes.
+ ///
+ virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+ virtual void print(raw_ostream &O);
+#endif
+ };
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 0000000..f58ec9b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,264 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfAccelTable.h"
+#include "DIE.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
+ switch (AT) {
+ case eAtomTypeNULL: return "eAtomTypeNULL";
+ case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
+ case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
+ case eAtomTypeTag: return "eAtomTypeTag";
+ case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
+ case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
+ }
+ llvm_unreachable("invalid AtomType!");
+}
+
+// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
+DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) :
+ Header(8 + (atomList.size() * 4)),
+ HeaderData(atomList),
+ Entries(Allocator) { }
+
+DwarfAccelTable::~DwarfAccelTable() { }
+
+void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) {
+ assert(Data.empty() && "Already finalized!");
+ // If the string is in the list already then add this die to the list
+ // otherwise add a new one.
+ DataArray &DIEs = Entries[Name];
+ DIEs.push_back(new (Allocator) HashDataContents(die, Flags));
+}
+
+void DwarfAccelTable::ComputeBucketCount(void) {
+ // First get the number of unique hashes.
+ std::vector<uint32_t> uniques(Data.size());
+ for (size_t i = 0, e = Data.size(); i < e; ++i)
+ uniques[i] = Data[i]->HashValue;
+ array_pod_sort(uniques.begin(), uniques.end());
+ std::vector<uint32_t>::iterator p =
+ std::unique(uniques.begin(), uniques.end());
+ uint32_t num = std::distance(uniques.begin(), p);
+
+ // Then compute the bucket size, minimum of 1 bucket.
+ if (num > 1024) Header.bucket_count = num/4;
+ if (num > 16) Header.bucket_count = num/2;
+ else Header.bucket_count = num > 0 ? num : 1;
+
+ Header.hashes_count = num;
+}
+
+// compareDIEs - comparison predicate that sorts DIEs by their offset.
+static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
+ const DwarfAccelTable::HashDataContents *B) {
+ return A->Die->getOffset() < B->Die->getOffset();
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
+ // Create the individual hash data outputs.
+ for (StringMap<DataArray>::iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+
+ // Unique the entries.
+ std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs);
+ EI->second.erase(std::unique(EI->second.begin(), EI->second.end()),
+ EI->second.end());
+
+ HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second);
+ Data.push_back(Entry);
+ }
+
+ // Figure out how many buckets we need, then compute the bucket
+ // contents and the final ordering. We'll emit the hashes and offsets
+ // by doing a walk during the emission phase. We add temporary
+ // symbols to the data so that we can reference them during the offset
+ // later, we'll emit them when we emit the data.
+ ComputeBucketCount();
+
+ // Compute bucket contents and final ordering.
+ Buckets.resize(Header.bucket_count);
+ for (size_t i = 0, e = Data.size(); i < e; ++i) {
+ uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+ Buckets[bucket].push_back(Data[i]);
+ Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+ }
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+ Asm->OutStreamer.AddComment("Header Magic");
+ Asm->EmitInt32(Header.magic);
+ Asm->OutStreamer.AddComment("Header Version");
+ Asm->EmitInt16(Header.version);
+ Asm->OutStreamer.AddComment("Header Hash Function");
+ Asm->EmitInt16(Header.hash_function);
+ Asm->OutStreamer.AddComment("Header Bucket Count");
+ Asm->EmitInt32(Header.bucket_count);
+ Asm->OutStreamer.AddComment("Header Hash Count");
+ Asm->EmitInt32(Header.hashes_count);
+ Asm->OutStreamer.AddComment("Header Data Length");
+ Asm->EmitInt32(Header.header_data_len);
+ Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+ Asm->EmitInt32(HeaderData.die_offset_base);
+ Asm->OutStreamer.AddComment("HeaderData Atom Count");
+ Asm->EmitInt32(HeaderData.Atoms.size());
+ for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+ Atom A = HeaderData.Atoms[i];
+ Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+ Asm->EmitInt16(A.type);
+ Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+ Asm->EmitInt16(A.form);
+ }
+}
+
+// Walk through and emit the buckets for the table. Each index is
+// an offset into the list of hashes.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+ unsigned index = 0;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ Asm->OutStreamer.AddComment("Bucket " + Twine(i));
+ if (Buckets[i].size() != 0)
+ Asm->EmitInt32(index);
+ else
+ Asm->EmitInt32(UINT32_MAX);
+ index += Buckets[i].size();
+ }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
+ Asm->EmitInt32((*HI)->HashValue);
+ }
+ }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
+ MCContext &Context = Asm->OutStreamer.getContext();
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
+ MCSymbolRefExpr::Create(SecBegin, Context),
+ Context);
+ Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t));
+ }
+ }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) {
+ uint64_t PrevHash = UINT64_MAX;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer.EmitLabel((*HI)->Sym);
+ Asm->OutStreamer.AddComment((*HI)->Str);
+ Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
+ D->getStringPoolSym());
+ Asm->OutStreamer.AddComment("Num DIEs");
+ Asm->EmitInt32((*HI)->Data.size());
+ for (ArrayRef<HashDataContents*>::const_iterator
+ DI = (*HI)->Data.begin(), DE = (*HI)->Data.end();
+ DI != DE; ++DI) {
+ // Emit the DIE offset
+ Asm->EmitInt32((*DI)->Die->getOffset());
+ // If we have multiple Atoms emit that info too.
+ // FIXME: A bit of a hack, we either emit only one atom or all info.
+ if (HeaderData.Atoms.size() > 1) {
+ Asm->EmitInt16((*DI)->Die->getTag());
+ Asm->EmitInt8((*DI)->Flags);
+ }
+ }
+ // Emit a 0 to terminate the data unless we have a hash collision.
+ if (PrevHash != (*HI)->HashValue)
+ Asm->EmitInt32(0);
+ PrevHash = (*HI)->HashValue;
+ }
+ }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
+ DwarfUnits *D) {
+ // Emit the header.
+ EmitHeader(Asm);
+
+ // Emit the buckets.
+ EmitBuckets(Asm);
+
+ // Emit the hashes.
+ EmitHashes(Asm);
+
+ // Emit the offsets.
+ EmitOffsets(Asm, SecBegin);
+
+ // Emit the hash data.
+ EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+ Header.print(O);
+ HeaderData.print(O);
+
+ O << "Entries: \n";
+ for (StringMap<DataArray>::const_iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ O << "Name: " << EI->getKeyData() << "\n";
+ for (DataArray::const_iterator DI = EI->second.begin(),
+ DE = EI->second.end();
+ DI != DE; ++DI)
+ (*DI)->print(O);
+ }
+
+ O << "Buckets and Hashes: \n";
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i)
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI)
+ (*HI)->print(O);
+
+ O << "Data: \n";
+ for (std::vector<HashData*>::const_iterator
+ DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
+ (*DI)->print(O);
+
+
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
new file mode 100644
index 0000000..9915bca
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -0,0 +1,283 @@
+//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+
+#include "DIE.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include <map>
+#include <vector>
+
+// The dwarf accelerator tables are an indirect hash table optimized
+// for null lookup rather than access to known data. They are output into
+// an on-disk format that looks like this:
+//
+// .-------------.
+// | HEADER |
+// |-------------|
+// | BUCKETS |
+// |-------------|
+// | HASHES |
+// |-------------|
+// | OFFSETS |
+// |-------------|
+// | DATA |
+// `-------------'
+//
+// where the header contains a magic number, version, type of hash function,
+// the number of buckets, total number of hashes, and room for a special
+// struct of data and the length of that struct.
+//
+// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+// section contains all of the 32-bit hash values in contiguous memory, and
+// the offsets contain the offset into the data area for the particular
+// hash.
+//
+// For a lookup example, we could hash a function name and take it modulo the
+// number of buckets giving us our bucket. From there we take the bucket value
+// as an index into the hashes table and look at each successive hash as long
+// as the hash value is still the same modulo result (bucket value) as earlier.
+// If we have a match we look at that same entry in the offsets table and
+// grab the offset in the data for our final match.
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DwarfUnits;
+
+class DwarfAccelTable {
+
+ enum HashFunctionType {
+ eHashFunctionDJB = 0u
+ };
+
+ static uint32_t HashDJB (StringRef Str) {
+ uint32_t h = 5381;
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ h = ((h << 5) + h) + Str[i];
+ return h;
+ }
+
+ // Helper function to compute the number of buckets needed based on
+ // the number of unique hashes.
+ void ComputeBucketCount (void);
+
+ struct TableHeader {
+ uint32_t magic; // 'HASH' magic value to allow endian detection
+ uint16_t version; // Version number.
+ uint16_t hash_function; // The hash function enumeration that was used.
+ uint32_t bucket_count; // The number of buckets in this hash table.
+ uint32_t hashes_count; // The total number of unique hash values
+ // and hash data offsets in this table.
+ uint32_t header_data_len; // The bytes to skip to get to the hash
+ // indexes (buckets) for correct alignment.
+ // Also written to disk is the implementation specific header data.
+
+ static const uint32_t MagicHash = 0x48415348;
+
+ TableHeader (uint32_t data_len) :
+ magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
+ bucket_count (0), hashes_count (0), header_data_len (data_len)
+ {}
+
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Magic: " << format("0x%x", magic) << "\n"
+ << "Version: " << version << "\n"
+ << "Hash Function: " << hash_function << "\n"
+ << "Bucket Count: " << bucket_count << "\n"
+ << "Header Data Length: " << header_data_len << "\n";
+ }
+ void dump() { print(dbgs()); }
+#endif
+ };
+
+public:
+ // The HeaderData describes the form of each set of data. In general this
+ // is as a list of atoms (atom_count) where each atom contains a type
+ // (AtomType type) of data, and an encoding form (form). In the case of
+ // data that is referenced via DW_FORM_ref_* the die_offset_base is
+ // used to describe the offset for all forms in the list of atoms.
+ // This also serves as a public interface of sorts.
+ // When written to disk this will have the form:
+ //
+ // uint32_t die_offset_base
+ // uint32_t atom_count
+ // atom_count Atoms
+ enum AtomType {
+ eAtomTypeNULL = 0u,
+ eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding
+ eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that
+ // contains the item in question
+ eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as
+ // DW_FORM_data1 (if no tags exceed 255) or
+ // DW_FORM_data2.
+ eAtomTypeNameFlags = 4u, // Flags from enum NameFlags
+ eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags
+ };
+
+ enum TypeFlags {
+ eTypeFlagClassMask = 0x0000000fu,
+
+ // Always set for C++, only set for ObjC if this is the
+ // @implementation for a class.
+ eTypeFlagClassIsImplementation = ( 1u << 1 )
+ };
+
+ // Make these public so that they can be used as a general interface to
+ // the class.
+ struct Atom {
+ AtomType type; // enum AtomType
+ uint16_t form; // DWARF DW_FORM_ defines
+
+ Atom(AtomType type, uint16_t form) : type(type), form(form) {}
+ static const char * AtomTypeString(enum AtomType);
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Type: " << AtomTypeString(type) << "\n"
+ << "Form: " << dwarf::FormEncodingString(form) << "\n";
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ private:
+ struct TableHeaderData {
+ uint32_t die_offset_base;
+ SmallVector<Atom, 1> Atoms;
+
+ TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
+ : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { }
+
+#ifndef NDEBUG
+ void print (raw_ostream &O) {
+ O << "die_offset_base: " << die_offset_base << "\n";
+ for (size_t i = 0; i < Atoms.size(); i++)
+ Atoms[i].print(O);
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ // The data itself consists of a str_offset, a count of the DIEs in the
+ // hash and the offsets to the DIEs themselves.
+ // On disk each data section is ended with a 0 KeyType as the end of the
+ // hash chain.
+ // On output this looks like:
+ // uint32_t str_offset
+ // uint32_t hash_data_count
+ // HashData[hash_data_count]
+public:
+ struct HashDataContents {
+ DIE *Die; // Offsets
+ char Flags; // Specific flags to output
+
+ HashDataContents(DIE *D, char Flags) :
+ Die(D),
+ Flags(Flags) { }
+ #ifndef NDEBUG
+ void print(raw_ostream &O) const {
+ O << " Offset: " << Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n";
+ O << " Flags: " << Flags << "\n";
+ }
+ #endif
+ };
+private:
+ struct HashData {
+ StringRef Str;
+ uint32_t HashValue;
+ MCSymbol *Sym;
+ ArrayRef<HashDataContents*> Data; // offsets
+ HashData(StringRef S, ArrayRef<HashDataContents*> Data)
+ : Str(S), Data(Data) {
+ HashValue = DwarfAccelTable::HashDJB(S);
+ }
+ #ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Name: " << Str << "\n";
+ O << " Hash Value: " << format("0x%x", HashValue) << "\n";
+ O << " Symbol: " ;
+ if (Sym) Sym->print(O);
+ else O << "<none>";
+ O << "\n";
+ for (size_t i = 0; i < Data.size(); i++) {
+ O << " Offset: " << Data[i]->Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n";
+ O << " Flags: " << Data[i]->Flags << "\n";
+ }
+ }
+ void dump() {
+ print(dbgs());
+ }
+ #endif
+ };
+
+ DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
+ void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
+
+ // Internal Functions
+ void EmitHeader(AsmPrinter *);
+ void EmitBuckets(AsmPrinter *);
+ void EmitHashes(AsmPrinter *);
+ void EmitOffsets(AsmPrinter *, MCSymbol *);
+ void EmitData(AsmPrinter *, DwarfUnits *D);
+
+ // Allocator for HashData and HashDataContents.
+ BumpPtrAllocator Allocator;
+
+ // Output Variables
+ TableHeader Header;
+ TableHeaderData HeaderData;
+ std::vector<HashData*> Data;
+
+ // String Data
+ typedef std::vector<HashDataContents*> DataArray;
+ typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries;
+ StringEntries Entries;
+
+ // Buckets/Hashes/Offsets
+ typedef std::vector<HashData*> HashList;
+ typedef std::vector<HashList> BucketList;
+ BucketList Buckets;
+ HashList Hashes;
+
+ // Public Implementation
+ public:
+ DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
+ ~DwarfAccelTable();
+ void AddName(StringRef, DIE*, char = 0);
+ void FinalizeTable(AsmPrinter *, const char *);
+ void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *);
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump() { print(dbgs()); }
+#endif
+};
+
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 0000000..fec5ced
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,156 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false),
+ moveTypeModule(AsmPrinter::CFI_M_None) {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+ if (moveTypeModule == AsmPrinter::CFI_M_Debug)
+ Asm->OutStreamer.EmitCFISections(false, true);
+
+ if (!Asm->MAI->isExceptionHandlingDwarf())
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+ return;
+
+ // Emit references to all used personality functions
+ bool AtLeastOne = false;
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+ if (!Personalities[i])
+ continue;
+ MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]);
+ TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
+ AtLeastOne = true;
+ }
+
+ if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
+ // This is a temporary hack to keep sections in the same order they
+ // were before. This lets us produce bit identical outputs while
+ // transitioning to CFI.
+ Asm->OutStreamer.SwitchSection(
+ const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection());
+ }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+
+ // See if we need frame move info.
+ AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
+ if (MoveType == AsmPrinter::CFI_M_EH ||
+ (MoveType == AsmPrinter::CFI_M_Debug &&
+ moveTypeModule == AsmPrinter::CFI_M_None))
+ moveTypeModule = MoveType;
+
+ shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+ shouldEmitPersonality = hasLandingPads &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitCFIStartProc();
+
+ // Indicate personality routine, if any.
+ if (!shouldEmitPersonality)
+ return;
+
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+ Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
+
+ Asm->OutStreamer.EmitDebugLabel
+ (Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+
+ // Provide LSDA information.
+ if (!shouldEmitLSDA)
+ return;
+
+ Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()),
+ LSDAEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitCFIEndProc();
+
+ if (!shouldEmitPersonality)
+ return;
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ EmitExceptionTable();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 0000000..f9b6f94
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,1711 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for constructing a dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+
+#include "DwarfCompileUnit.h"
+#include "DwarfAccelTable.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+/// CompileUnit - Compile unit constructor.
+CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A,
+ DwarfDebug *DW, DwarfUnits *DWU)
+ : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU),
+ IndexTyDie(0), DebugInfoOffset(0) {
+ DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+}
+
+/// ~CompileUnit - Destructor for compile unit.
+CompileUnit::~CompileUnit() {
+ for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+ DIEBlocks[j]->~DIEBlock();
+}
+
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
+ DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
+ return Value;
+}
+
+/// getDefaultLowerBound - Return the default lower bound for an array. If the
+/// DWARF version doesn't handle the language, return -1.
+int64_t CompileUnit::getDefaultLowerBound() const {
+ switch (Language) {
+ default:
+ break;
+
+ case dwarf::DW_LANG_C89:
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C_plus_plus:
+ case dwarf::DW_LANG_ObjC:
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ return 0;
+
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ case dwarf::DW_LANG_Fortran95:
+ return 1;
+
+ // The languages below have valid values only if the DWARF version >= 4.
+ case dwarf::DW_LANG_Java:
+ case dwarf::DW_LANG_Python:
+ case dwarf::DW_LANG_UPC:
+ case dwarf::DW_LANG_D:
+ if (dwarf::DWARF_VERSION >= 4)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Ada83:
+ case dwarf::DW_LANG_Ada95:
+ case dwarf::DW_LANG_Cobol74:
+ case dwarf::DW_LANG_Cobol85:
+ case dwarf::DW_LANG_Modula2:
+ case dwarf::DW_LANG_Pascal83:
+ case dwarf::DW_LANG_PLI:
+ if (dwarf::DWARF_VERSION >= 4)
+ return 1;
+ break;
+ }
+
+ return -1;
+}
+
+/// addFlag - Add a flag that is true.
+void CompileUnit::addFlag(DIE *Die, unsigned Attribute) {
+ if (!DD->useDarwinGDBCompat())
+ Die->addValue(Attribute, dwarf::DW_FORM_flag_present,
+ DIEIntegerOne);
+ else
+ addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1);
+}
+
+/// addUInt - Add an unsigned integer attribute data and value.
+///
+void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
+ unsigned Form, uint64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(false, Integer);
+ DIEValue *Value = Integer == 1 ?
+ DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addSInt - Add an signed integer attribute data and value.
+///
+void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
+ unsigned Form, int64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(true, Integer);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addString - Add a string attribute data and value. We always emit a
+/// reference to the string pool instead of immediate strings so that DIEs have
+/// more predictable sizes. In the case of split dwarf we emit an index
+/// into another table which gets us the static offset into the string
+/// table.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
+ if (!DD->useSplitDwarf()) {
+ MCSymbol *Symb = DU->getStringPoolEntry(String);
+ DIEValue *Value;
+ if (Asm->needsRelocationsForDwarfStringPool())
+ Value = new (DIEValueAllocator) DIELabel(Symb);
+ else {
+ MCSymbol *StringPool = DU->getStringPoolSym();
+ Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+ }
+ Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
+ } else {
+ unsigned idx = DU->getStringPoolIndex(String);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value);
+ }
+}
+
+/// addLocalString - Add a string attribute data and value. This is guaranteed
+/// to be in the local string pool instead of indirected.
+void CompileUnit::addLocalString(DIE *Die, unsigned Attribute,
+ StringRef String) {
+ MCSymbol *Symb = DU->getStringPoolEntry(String);
+ DIEValue *Value;
+ if (Asm->needsRelocationsForDwarfStringPool())
+ Value = new (DIEValueAllocator) DIELabel(Symb);
+ else {
+ MCSymbol *StringPool = DU->getStringPoolSym();
+ Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+ }
+ Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
+}
+
+/// addLabel - Add a Dwarf label attribute data and value.
+///
+void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Label) {
+ DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addLabelAddress - Add a dwarf label attribute data and value using
+/// DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute,
+ MCSymbol *Label) {
+ if (!DD->useSplitDwarf()) {
+ if (Label != NULL) {
+ DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+ Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
+ } else {
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(0);
+ Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
+ }
+ } else {
+ unsigned idx = DU->getAddrPoolIndex(Label);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+ }
+}
+
+/// addOpAddress - Add a dwarf op address data and value using the
+/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) {
+
+ if (!DD->useSplitDwarf()) {
+ addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Die, 0, dwarf::DW_FORM_udata, Sym);
+ } else {
+ unsigned idx = DU->getAddrPoolIndex(Sym);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+ Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value);
+ }
+}
+
+/// addDelta - Add a label delta attribute data and value.
+///
+void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addDIEEntry - Add a DIE attribute data and value.
+///
+void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+ DIE *Entry) {
+ Die->addValue(Attribute, Form, createDIEEntry(Entry));
+}
+
+/// addBlock - Add block data.
+///
+void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+ DIEBlock *Block) {
+ Block->ComputeSize(Asm);
+ DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+ Die->addValue(Attribute, Block->BestForm(), Block);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
+ // Verify variable.
+ if (!V.Verify())
+ return;
+
+ unsigned Line = V.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(),
+ V.getContext().getDirectory(),
+ getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
+ // Verify global variable.
+ if (!G.Verify())
+ return;
+
+ unsigned Line = G.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(),
+ getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
+ // Verify subprogram.
+ if (!SP.Verify())
+ return;
+
+ // If the line number is 0, don't add it.
+ unsigned Line = SP.getLineNumber();
+ if (Line == 0)
+ return;
+
+ unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(),
+ SP.getDirectory(), getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
+ // Verify type.
+ if (!Ty.Verify())
+ return;
+
+ unsigned Line = Ty.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(),
+ Ty.getDirectory(), getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
+ // Verify type.
+ if (!Ty.Verify())
+ return;
+
+ unsigned Line = Ty.getLineNumber();
+ if (Line == 0)
+ return;
+ DIFile File = Ty.getFile();
+ unsigned FileID = DD->getOrCreateSourceID(File.getFilename(),
+ File.getDirectory(), getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
+ // Verify namespace.
+ if (!NS.Verify())
+ return;
+
+ unsigned Line = NS.getLineNumber();
+ if (Line == 0)
+ return;
+ StringRef FN = NS.getFilename();
+
+ unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(),
+ getUniqueID());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a
+/// DbgVariable based on provided MachineLocation.
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die,
+ MachineLocation Location) {
+ if (DV->variableHasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else if (DV->isBlockByrefVariable())
+ addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// addRegisterOp - Add register operand.
+void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) {
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+ if (DWReg < 32)
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
+ else {
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ }
+}
+
+/// addRegisterOffset - Add register offset.
+void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg,
+ int64_t Offset) {
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (Reg == TRI->getFrameRegister(*Asm->MF))
+ // If variable offset is based in frame register then use fbreg.
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+ else if (DWReg < 32)
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
+ else {
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ }
+ addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset);
+}
+
+/// addAddress - Add an address attribute to a die based on the location
+/// provided.
+void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ if (Location.isReg())
+ addRegisterOp(Block, Location.getReg());
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// addComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location. Add the DWARF information to the die.
+///
+void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ unsigned N = DV->getNumAddrElements();
+ unsigned i = 0;
+ if (Location.isReg()) {
+ if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) {
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1));
+ i = 2;
+ } else
+ addRegisterOp(Block, Location.getReg());
+ }
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ for (;i < N; ++i) {
+ uint64_t Element = DV->getAddrElement(i);
+ if (Element == DIBuilder::OpPlus) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
+ } else if (Element == DIBuilder::OpDeref) {
+ if (!Location.isReg())
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ } else llvm_unreachable("unknown DIBuilder Opcode");
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+ VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+ gives the variable VarName either the struct, or a pointer to the struct, as
+ its type. This is necessary for various behind-the-scenes things the
+ compiler needs to do with by-reference variables in Blocks.
+
+ However, as far as the original *programmer* is concerned, the variable
+ should still have type 'SomeType', as originally declared.
+
+ The function getBlockByrefType dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable, which is then assigned to
+ the variable's Debug Information Entry as its real type. So far, so good.
+ However now the debugger will expect the variable VarName to have the type
+ SomeType. So we need the location attribute for the variable to be an
+ expression that explains to the debugger how to navigate through the
+ pointers and struct to find the actual variable of type SomeType.
+
+ The following function does just that. We start by getting
+ the "normal" location for the variable. This will be the location
+ of either the struct __Block_byref_x_VarName or the pointer to the
+ struct __Block_byref_x_VarName.
+
+ The struct will look something like:
+
+ struct __Block_byref_x_VarName {
+ ... <various fields>
+ struct __Block_byref_x_VarName *forwarding;
+ ... <various other fields>
+ SomeType VarName;
+ ... <maybe more fields>
+ };
+
+ If we are given the struct directly (as our starting point) we
+ need to tell the debugger to:
+
+ 1). Add the offset of the forwarding field.
+
+ 2). Follow that pointer to get the real __Block_byref_x_VarName
+ struct to use (the real one may have been copied onto the heap).
+
+ 3). Add the offset for the field VarName, to find the actual variable.
+
+ If we started with a pointer to the struct, then we need to
+ dereference that pointer first, before the other steps.
+ Translating this into DWARF ops, we will need to append the following
+ to the current location description for the variable:
+
+ DW_OP_deref -- optional, if we start with a pointer
+ DW_OP_plus_uconst <forward_fld_offset>
+ DW_OP_deref
+ DW_OP_plus_uconst <varName_fld_offset>
+
+ That is what this function does. */
+
+/// addBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location. Add the DWARF information to the die. For
+/// more information, read large comment just above here.
+///
+void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ DIType Ty = DV->getType();
+ DIType TmpTy = Ty;
+ unsigned Tag = Ty.getTag();
+ bool isPointer = false;
+
+ StringRef varName = DV->getName();
+
+ if (Tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ TmpTy = DTy.getTypeDerivedFrom();
+ isPointer = true;
+ }
+
+ DICompositeType blockStruct = DICompositeType(TmpTy);
+
+ // Find the __forwarding field and the variable field in the __Block_byref
+ // struct.
+ DIArray Fields = blockStruct.getTypeArray();
+ DIDescriptor varField = DIDescriptor();
+ DIDescriptor forwardingField = DIDescriptor();
+
+ for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Fields.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ StringRef fieldName = DT.getName();
+ if (fieldName == "__forwarding")
+ forwardingField = Element;
+ else if (fieldName == varName)
+ varField = Element;
+ }
+
+ // Get the offsets for the forwarding field and the variable field.
+ unsigned forwardingFieldOffset =
+ DIDerivedType(forwardingField).getOffsetInBits() >> 3;
+ unsigned varFieldOffset =
+ DIDerivedType(varField).getOffsetInBits() >> 3;
+
+ // Decode the original location, and use that as the start of the byref
+ // variable's location.
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ if (Location.isReg())
+ addRegisterOp(Block, Location.getReg());
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ // If we started with a pointer to the __Block_byref... struct, then
+ // the first thing we need to do is dereference the pointer (DW_OP_deref).
+ if (isPointer)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Next add the offset for the '__forwarding' field:
+ // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
+ // adding the offset if it's 0.
+ if (forwardingFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+ }
+
+ // Now dereference the __forwarding field to get to the real __Block_byref
+ // struct: DW_OP_deref.
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Now that we've got the real __Block_byref... struct, add the offset
+ // for the variable's field to get to the location of the actual variable:
+ // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
+ if (varFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// isTypeSigned - Return true if the type is signed.
+static bool isTypeSigned(DIType Ty, int *SizeInBits) {
+ if (Ty.isDerivedType())
+ return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits);
+ if (Ty.isBasicType())
+ if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed
+ || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) {
+ *SizeInBits = Ty.getSizeInBits();
+ return true;
+ }
+ return false;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
+ DIType Ty) {
+ assert(MO.isImm() && "Invalid machine operand!");
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ int SizeInBits = -1;
+ bool SignedConstant = isTypeSigned(Ty, &SizeInBits);
+ unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata;
+ switch (SizeInBits) {
+ case 8: Form = dwarf::DW_FORM_data1; break;
+ case 16: Form = dwarf::DW_FORM_data2; break;
+ case 32: Form = dwarf::DW_FORM_data4; break;
+ case 64: Form = dwarf::DW_FORM_data8; break;
+ default: break;
+ }
+ SignedConstant ? addSInt(Block, 0, Form, MO.getImm())
+ : addUInt(Block, 0, Form, MO.getImm());
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+ assert (MO.isFPImm() && "Invalid machine operand!");
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ APFloat FPImm = MO.getFPImm()->getValueAPF();
+
+ // Get the raw data form of the floating point.
+ const APInt FltVal = FPImm.bitcastToAPInt();
+ const char *FltPtr = (const char*)FltVal.getRawData();
+
+ int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getDataLayout().isLittleEndian();
+ int Incr = (LittleEndian ? 1 : -1);
+ int Start = (LittleEndian ? 0 : NumBytes - 1);
+ int Stop = (LittleEndian ? NumBytes : -1);
+
+ // Output the constant to DWARF one byte at a time.
+ for (; Start != Stop; Start += Incr)
+ addUInt(Block, 0, dwarf::DW_FORM_data1,
+ (unsigned char)0xFF & FltPtr[Start]);
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
+ return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false);
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
+ bool Unsigned) {
+ return addConstantValue(Die, CI->getValue(), Unsigned);
+}
+
+// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
+ bool Unsigned) {
+ unsigned CIBitWidth = Val.getBitWidth();
+ if (CIBitWidth <= 64) {
+ unsigned form = 0;
+ switch (CIBitWidth) {
+ case 8: form = dwarf::DW_FORM_data1; break;
+ case 16: form = dwarf::DW_FORM_data2; break;
+ case 32: form = dwarf::DW_FORM_data4; break;
+ case 64: form = dwarf::DW_FORM_data8; break;
+ default:
+ form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
+ }
+ if (Unsigned)
+ addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue());
+ else
+ addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue());
+ return true;
+ }
+
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ // Get the raw data form of the large APInt.
+ const uint64_t *Ptr64 = Val.getRawData();
+
+ int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getDataLayout().isLittleEndian();
+
+ // Output the constant to DWARF one byte at a time.
+ for (int i = 0; i < NumBytes; i++) {
+ uint8_t c;
+ if (LittleEndian)
+ c = Ptr64[i / 8] >> (8 * (i & 7));
+ else
+ c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, c);
+ }
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addTemplateParams - Add template parameters in buffer.
+void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+ // Add template parameters.
+ for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
+ DIDescriptor Element = TParams.getElement(i);
+ if (Element.isTemplateTypeParameter())
+ Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+ DITemplateTypeParameter(Element)));
+ else if (Element.isTemplateValueParameter())
+ Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+ DITemplateValueParameter(Element)));
+ }
+}
+
+/// getOrCreateContextDIE - Get context owner's DIE.
+DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) {
+ if (Context.isType())
+ return getOrCreateTypeDIE(DIType(Context));
+ else if (Context.isNameSpace())
+ return getOrCreateNameSpace(DINameSpace(Context));
+ else if (Context.isSubprogram())
+ return getOrCreateSubprogramDIE(DISubprogram(Context));
+ else
+ return getDIE(Context);
+}
+
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
+ if (DIE *ContextDIE = getOrCreateContextDIE(Context))
+ ContextDIE->addChild(Die);
+ else
+ addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
+ DIType Ty(TyNode);
+ if (!Ty.Verify())
+ return NULL;
+ DIE *TyDIE = getDIE(Ty);
+ if (TyDIE)
+ return TyDIE;
+
+ // Create new type.
+ TyDIE = new DIE(dwarf::DW_TAG_base_type);
+ insertDIE(Ty, TyDIE);
+ if (Ty.isBasicType())
+ constructTypeDIE(*TyDIE, DIBasicType(Ty));
+ else if (Ty.isCompositeType())
+ constructTypeDIE(*TyDIE, DICompositeType(Ty));
+ else {
+ assert(Ty.isDerivedType() && "Unknown kind of DIType");
+ constructTypeDIE(*TyDIE, DIDerivedType(Ty));
+ }
+ // If this is a named finished type then include it in the list of types
+ // for the accelerator tables.
+ if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+ bool IsImplementation = 0;
+ if (Ty.isCompositeType()) {
+ DICompositeType CT(Ty);
+ // A runtime language of 0 actually means C/C++ and that any
+ // non-negative value is some version of Objective-C/C++.
+ IsImplementation = (CT.getRunTimeLang() == 0) ||
+ CT.isObjcClassComplete();
+ }
+ unsigned Flags = IsImplementation ?
+ DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
+ addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
+ }
+
+ addToContextOwner(TyDIE, Ty.getContext());
+ return TyDIE;
+}
+
+/// addType - Add a new type attribute to the specified entity.
+void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
+ if (!Ty.Verify())
+ return;
+
+ // Check for pre-existence.
+ DIEEntry *Entry = getDIEEntry(Ty);
+ // If it exists then use the existing value.
+ if (Entry) {
+ Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
+ return;
+ }
+
+ // Construct type.
+ DIE *Buffer = getOrCreateTypeDIE(Ty);
+
+ // Set up proxy.
+ Entry = createDIEEntry(Buffer);
+ insertDIEEntry(Ty, Entry);
+ Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
+
+ // If this is a complete composite type then include it in the
+ // list of global types.
+ addGlobalType(Ty);
+}
+
+/// addGlobalType - Add a new global type to the compile unit.
+///
+void CompileUnit::addGlobalType(DIType Ty) {
+ DIDescriptor Context = Ty.getContext();
+ if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
+ && (!Context || Context.isCompileUnit() || Context.isFile()
+ || Context.isNameSpace()))
+ if (DIEEntry *Entry = getDIEEntry(Ty))
+ GlobalTypes[Ty.getName()] = Entry->getEntry();
+}
+
+/// addPubTypes - Add type for pubtypes section.
+void CompileUnit::addPubTypes(DISubprogram SP) {
+ DICompositeType SPTy = SP.getType();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag != dwarf::DW_TAG_subroutine_type)
+ return;
+
+ DIArray Args = SPTy.getTypeArray();
+ for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+ DIType ATy(Args.getElement(i));
+ if (!ATy.Verify())
+ continue;
+ addGlobalType(ATy);
+ }
+}
+
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+ // Get core information.
+ StringRef Name = BTy.getName();
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, Name);
+
+ if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
+ Buffer.setTag(dwarf::DW_TAG_unspecified_type);
+ // Unspecified types has only name, nothing else.
+ return;
+ }
+
+ Buffer.setTag(dwarf::DW_TAG_base_type);
+ addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
+
+ uint64_t Size = BTy.getSizeInBits() >> 3;
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+ // Get core information.
+ StringRef Name = DTy.getName();
+ uint64_t Size = DTy.getSizeInBits() >> 3;
+ unsigned Tag = DTy.getTag();
+
+ // FIXME - Workaround for templates.
+ if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+ Buffer.setTag(Tag);
+
+ // Map to main type, void will not have a type.
+ DIType FromTy = DTy.getTypeDerivedFrom();
+ addType(&Buffer, FromTy);
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, Name);
+
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size && Tag != dwarf::DW_TAG_pointer_type)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+ if (Tag == dwarf::DW_TAG_ptr_to_member_type)
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+ getOrCreateTypeDIE(DTy.getClassType()));
+ // Add source line info if available and TyDesc is not a forward declaration.
+ if (!DTy.isForwardDecl())
+ addSourceLine(&Buffer, DTy);
+}
+
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+ // Get core information.
+ StringRef Name = CTy.getName();
+
+ uint64_t Size = CTy.getSizeInBits() >> 3;
+ unsigned Tag = CTy.getTag();
+ Buffer.setTag(Tag);
+
+ switch (Tag) {
+ case dwarf::DW_TAG_array_type:
+ constructArrayTypeDIE(Buffer, &CTy);
+ break;
+ case dwarf::DW_TAG_enumeration_type: {
+ DIArray Elements = CTy.getTypeArray();
+
+ // Add enumerators to enumeration type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *ElemDie = NULL;
+ DIDescriptor Enum(Elements.getElement(i));
+ if (Enum.isEnumerator()) {
+ ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
+ Buffer.addChild(ElemDie);
+ }
+ }
+ DIType DTy = CTy.getTypeDerivedFrom();
+ if (DTy.Verify()) {
+ addType(&Buffer, DTy);
+ addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1);
+ }
+ }
+ break;
+ case dwarf::DW_TAG_subroutine_type: {
+ // Add return type.
+ DIArray Elements = CTy.getTypeArray();
+ DIDescriptor RTy = Elements.getElement(0);
+ addType(&Buffer, DIType(RTy));
+
+ bool isPrototyped = true;
+ // Add arguments.
+ for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Ty = Elements.getElement(i);
+ if (Ty.isUnspecifiedParameter()) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+ Buffer.addChild(Arg);
+ isPrototyped = false;
+ } else {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ addType(Arg, DIType(Ty));
+ if (DIType(Ty).isArtificial())
+ addFlag(Arg, dwarf::DW_AT_artificial);
+ Buffer.addChild(Arg);
+ }
+ }
+ // Add prototype flag if we're dealing with a C language and the
+ // function has been prototyped.
+ if (isPrototyped &&
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
+ addFlag(&Buffer, dwarf::DW_AT_prototyped);
+ }
+ break;
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_class_type: {
+ // Add elements to structure type.
+ DIArray Elements = CTy.getTypeArray();
+
+ // A forward struct declared type may not have elements available.
+ unsigned N = Elements.getNumElements();
+ if (N == 0)
+ break;
+
+ // Add elements to structure type.
+ for (unsigned i = 0; i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIE *ElemDie = NULL;
+ if (Element.isSubprogram()) {
+ DISubprogram SP(Element);
+ ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
+ if (SP.isProtected())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (SP.isPrivate())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+ if (SP.isExplicit())
+ addFlag(ElemDie, dwarf::DW_AT_explicit);
+ } else if (Element.isDerivedType()) {
+ DIDerivedType DDTy(Element);
+ if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+ ElemDie = new DIE(dwarf::DW_TAG_friend);
+ addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
+ } else if (DDTy.isStaticMember())
+ ElemDie = createStaticMemberDIE(DDTy);
+ else
+ ElemDie = createMemberDIE(DDTy);
+ } else if (Element.isObjCProperty()) {
+ DIObjCProperty Property(Element);
+ ElemDie = new DIE(Property.getTag());
+ StringRef PropertyName = Property.getObjCPropertyName();
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
+ addType(ElemDie, Property.getType());
+ addSourceLine(ElemDie, Property);
+ StringRef GetterName = Property.getObjCPropertyGetterName();
+ if (!GetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
+ StringRef SetterName = Property.getObjCPropertySetterName();
+ if (!SetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
+ unsigned PropertyAttributes = 0;
+ if (Property.isReadOnlyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+ if (Property.isReadWriteObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+ if (Property.isAssignObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+ if (Property.isRetainObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+ if (Property.isCopyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+ if (Property.isNonAtomicObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+ if (PropertyAttributes)
+ addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0,
+ PropertyAttributes);
+
+ DIEEntry *Entry = getDIEEntry(Element);
+ if (!Entry) {
+ Entry = createDIEEntry(ElemDie);
+ insertDIEEntry(Element, Entry);
+ }
+ } else
+ continue;
+ Buffer.addChild(ElemDie);
+ }
+
+ if (CTy.isAppleBlockExtension())
+ addFlag(&Buffer, dwarf::DW_AT_APPLE_block);
+
+ DICompositeType ContainingType = CTy.getContainingType();
+ if (DIDescriptor(ContainingType).isCompositeType())
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+ getOrCreateTypeDIE(DIType(ContainingType)));
+ else {
+ DIDescriptor Context = CTy.getContext();
+ addToContextOwner(&Buffer, Context);
+ }
+
+ if (CTy.isObjcClassComplete())
+ addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
+
+ // Add template parameters to a class, structure or union types.
+ // FIXME: The support isn't in the metadata for this yet.
+ if (Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type)
+ addTemplateParams(Buffer, CTy.getTemplateParams());
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, Name);
+
+ if (Tag == dwarf::DW_TAG_enumeration_type ||
+ Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type) {
+ // Add size if non-zero (derived types might be zero-sized.)
+ // TODO: Do we care about size for enum forward declarations?
+ if (Size)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ else if (!CTy.isForwardDecl())
+ // Add zero size if it is not a forward declaration.
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+
+ // If we're a forward decl, say so.
+ if (CTy.isForwardDecl())
+ addFlag(&Buffer, dwarf::DW_AT_declaration);
+
+ // Add source line info if available.
+ if (!CTy.isForwardDecl())
+ addSourceLine(&Buffer, CTy);
+
+ // No harm in adding the runtime language to the declaration.
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
+ }
+}
+
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateTypeParameter.
+DIE *
+CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+ DIE *ParamDIE = getDIE(TP);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+ addType(ParamDIE, TP.getType());
+ addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
+ return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateValueParameter.
+DIE *
+CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
+ DIE *ParamDIE = getDIE(TPV);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+ addType(ParamDIE, TPV.getType());
+ if (!TPV.getName().empty())
+ addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
+ addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ TPV.getValue());
+ return ParamDIE;
+}
+
+/// getOrCreateNameSpace - Create a DIE for DINameSpace.
+DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
+ DIE *NDie = getDIE(NS);
+ if (NDie)
+ return NDie;
+ NDie = new DIE(dwarf::DW_TAG_namespace);
+ insertDIE(NS, NDie);
+ if (!NS.getName().empty()) {
+ addString(NDie, dwarf::DW_AT_name, NS.getName());
+ addAccelNamespace(NS.getName(), NDie);
+ } else
+ addAccelNamespace("(anonymous namespace)", NDie);
+ addSourceLine(NDie, NS);
+ addToContextOwner(NDie, NS.getContext());
+ return NDie;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+/// getOrCreateSubprogramDIE - Create new DIE using SP.
+DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
+ DIE *SPDie = getDIE(SP);
+ if (SPDie)
+ return SPDie;
+
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ insertDIE(SP, SPDie);
+
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ DIE *DeclDie = NULL;
+ if (SPDecl.isSubprogram()) {
+ DeclDie = getOrCreateSubprogramDIE(SPDecl);
+ }
+
+ // Add to context owner.
+ addToContextOwner(SPDie, SP.getContext());
+
+ // Add function template parameters.
+ addTemplateParams(*SPDie, SP.getTemplateParams());
+
+ // Unfortunately this code needs to stay here instead of below the
+ // AT_specification code in order to work around a bug in older
+ // gdbs that requires the linkage name to resolve multiple template
+ // functions.
+ // TODO: Remove this set of code when we get rid of the old gdb
+ // compatibility.
+ StringRef LinkageName = SP.getLinkageName();
+ if (!LinkageName.empty() && DD->useDarwinGDBCompat())
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+
+ // If this DIE is going to refer declaration info using AT_specification
+ // then there is no need to add other attributes.
+ if (DeclDie) {
+ // Refer function declaration directly.
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ DeclDie);
+
+ return SPDie;
+ }
+
+ // Add the linkage name if we have one.
+ if (!LinkageName.empty() && !DD->useDarwinGDBCompat())
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+
+ // Constructors and operators for anonymous aggregates do not have names.
+ if (!SP.getName().empty())
+ addString(SPDie, dwarf::DW_AT_name, SP.getName());
+
+ addSourceLine(SPDie, SP);
+
+ // Add the prototype if we have a prototype and we have a C like
+ // language.
+ if (SP.isPrototyped() &&
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
+ addFlag(SPDie, dwarf::DW_AT_prototyped);
+
+ // Add Return Type.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
+ addType(SPDie, SPTy);
+ else
+ addType(SPDie, DIType(Args.getElement(0)));
+
+ unsigned VK = SP.getVirtuality();
+ if (VK) {
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
+ DIEBlock *Block = getDIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+ ContainingTypeMap.insert(std::make_pair(SPDie,
+ SP.getContainingType()));
+ }
+
+ if (!SP.isDefinition()) {
+ addFlag(SPDie, dwarf::DW_AT_declaration);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled while processing variables.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(Args.getElement(i));
+ addType(Arg, ATy);
+ if (ATy.isArtificial())
+ addFlag(Arg, dwarf::DW_AT_artificial);
+ SPDie->addChild(Arg);
+ }
+ }
+
+ if (SP.isArtificial())
+ addFlag(SPDie, dwarf::DW_AT_artificial);
+
+ if (!SP.isLocalToUnit())
+ addFlag(SPDie, dwarf::DW_AT_external);
+
+ if (SP.isOptimized())
+ addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
+
+ if (unsigned isa = Asm->getISAEncoding()) {
+ addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
+
+ return SPDie;
+}
+
+// Return const expression if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return NULL;
+
+ // First operand points to a global struct.
+ Value *Ptr = CE->getOperand(0);
+ if (!isa<GlobalValue>(Ptr) ||
+ !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
+ return NULL;
+
+ // Second operand is zero.
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return NULL;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return NULL;
+
+ return CE;
+}
+
+/// createGlobalVariableDIE - create global variable DIE.
+void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
+ // Check for pre-existence.
+ if (getDIE(N))
+ return;
+
+ DIGlobalVariable GV(N);
+ if (!GV.Verify())
+ return;
+
+ DIDescriptor GVContext = GV.getContext();
+ DIType GTy = GV.getType();
+
+ // If this is a static data member definition, some attributes belong
+ // to the declaration DIE.
+ DIE *VariableDIE = NULL;
+ bool IsStaticMember = false;
+ DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration();
+ if (SDMDecl.Verify()) {
+ assert(SDMDecl.isStaticMember() && "Expected static member decl");
+ // We need the declaration DIE that is in the static member's class.
+ // But that class might not exist in the DWARF yet.
+ // Creating the class will create the static member decl DIE.
+ getOrCreateContextDIE(SDMDecl.getContext());
+ VariableDIE = getDIE(SDMDecl);
+ assert(VariableDIE && "Static member decl has no context?");
+ IsStaticMember = true;
+ }
+
+ // If this is not a static data member definition, create the variable
+ // DIE and add the initial set of attributes to it.
+ if (!VariableDIE) {
+ VariableDIE = new DIE(GV.getTag());
+ // Add to map.
+ insertDIE(N, VariableDIE);
+
+ // Add name and type.
+ addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+ addType(VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV.isLocalToUnit()) {
+ addFlag(VariableDIE, dwarf::DW_AT_external);
+ addGlobalName(GV.getName(), VariableDIE);
+ }
+
+ // Add line number info.
+ addSourceLine(VariableDIE, GV);
+ // Add to context owner.
+ addToContextOwner(VariableDIE, GVContext);
+ }
+
+ // Add location.
+ bool addToAccelTable = false;
+ DIE *VariableSpecDIE = NULL;
+ bool isGlobalVariable = GV.getGlobal() != NULL;
+ if (isGlobalVariable) {
+ addToAccelTable = true;
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal()));
+ // Do not create specification DIE if context is either compile unit
+ // or a subprogram.
+ if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
+ !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+ // Create specification DIE.
+ VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, VariableDIE);
+ addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+ // A static member's declaration is already flagged as such.
+ if (!SDMDecl.Verify())
+ addFlag(VariableDIE, dwarf::DW_AT_declaration);
+ addDie(VariableSpecDIE);
+ } else {
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+ // Add linkage name.
+ StringRef LinkageName = GV.getLinkageName();
+ if (!LinkageName.empty()) {
+ // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
+ // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
+ // TAG_variable.
+ addString(IsStaticMember && VariableSpecDIE ?
+ VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+ // In compatibility mode with older gdbs we put the linkage name on both
+ // the TAG_variable DIE and on the TAG_member DIE.
+ if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat())
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+ // AT_const_value was added when the static member was created. To avoid
+ // emitting AT_const_value multiple times, we only add AT_const_value when
+ // it is not a static member.
+ if (!IsStaticMember)
+ addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
+ } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ addToAccelTable = true;
+ // GV is a merged global.
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ Value *Ptr = CE->getOperand(0);
+ addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
+ addUInt(Block, 0, dwarf::DW_FORM_udata,
+ Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+
+ if (addToAccelTable) {
+ DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE;
+ addAccelName(GV.getName(), AddrDIE);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
+ addAccelName(GV.getLinkageName(), AddrDIE);
+ }
+
+ return;
+}
+
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
+ DIE *IndexTy) {
+ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+
+ // The LowerBound value defines the lower bounds which is typically zero for
+ // C/C++. The Count value is the number of elements. Values are 64 bit. If
+ // Count == -1 then the array is unbounded and we do not emit
+ // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and
+ // Count == 0, then the array has zero elements in which case we do not emit
+ // an upper bound.
+ int64_t LowerBound = SR.getLo();
+ int64_t DefaultLowerBound = getDefaultLowerBound();
+ int64_t Count = SR.getCount();
+
+ if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
+ addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound);
+
+ if (Count != -1 && Count != 0)
+ // FIXME: An unbounded array should reference the expression that defines
+ // the array.
+ addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1);
+
+ Buffer.addChild(DW_Subrange);
+}
+
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy) {
+ Buffer.setTag(dwarf::DW_TAG_array_type);
+ if (CTy->isVector())
+ addFlag(&Buffer, dwarf::DW_AT_GNU_vector);
+
+ // Emit derived type.
+ addType(&Buffer, CTy->getTypeDerivedFrom());
+ DIArray Elements = CTy->getTypeArray();
+
+ // Get an anonymous type for index type.
+ // FIXME: This type should be passed down from the front end
+ // as different languages may have different sizes for indexes.
+ DIE *IdxTy = getIndexTyDie();
+ if (!IdxTy) {
+ // Construct an anonymous type for index type.
+ IdxTy = new DIE(dwarf::DW_TAG_base_type);
+ addString(IdxTy, dwarf::DW_AT_name, "int");
+ addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+ addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_signed);
+ addDie(IdxTy);
+ setIndexTyDie(IdxTy);
+ }
+
+ // Add subranges to array type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+ constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+ }
+}
+
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
+ DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+ StringRef Name = ETy.getName();
+ addString(Enumerator, dwarf::DW_AT_name, Name);
+ int64_t Value = ETy.getEnumValue();
+ addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+ return Enumerator;
+}
+
+/// constructContainingTypeDIEs - Construct DIEs for types that contain
+/// vtables.
+void CompileUnit::constructContainingTypeDIEs() {
+ for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
+ CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+ DIE *SPDie = CI->first;
+ const MDNode *N = CI->second;
+ if (!N) continue;
+ DIE *NDie = getDIE(N);
+ if (!NDie) continue;
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ }
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
+ StringRef Name = DV->getName();
+
+ // Translate tag to proper Dwarf tag.
+ unsigned Tag = DV->getTag();
+
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
+ DbgVariable *AbsVar = DV->getAbstractVariable();
+ DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL;
+ if (AbsDIE)
+ addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsDIE);
+ else {
+ addString(VariableDie, dwarf::DW_AT_name, Name);
+ addSourceLine(VariableDie, DV->getVariable());
+ addType(VariableDie, DV->getType());
+ }
+
+ if (DV->isArtificial())
+ addFlag(VariableDie, dwarf::DW_AT_artificial);
+
+ if (isScopeAbstract) {
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // Add variable address.
+
+ unsigned Offset = DV->getDotDebugLocOffset();
+ if (Offset != ~0U) {
+ addLabel(VariableDie, dwarf::DW_AT_location,
+ dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("debug_loc", Offset));
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // Check if variable is described by a DBG_VALUE instruction.
+ if (const MachineInstr *DVInsn = DV->getMInsn()) {
+ bool updated = false;
+ if (DVInsn->getNumOperands() == 3) {
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (DVInsn->getOperand(1).isImm() &&
+ TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF,
+ DVInsn->getOperand(1).getImm(),
+ FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, VariableDie, Location);
+
+ } else if (RegOp.getReg())
+ addVariableAddress(DV, VariableDie,
+ MachineLocation(RegOp.getReg()));
+ updated = true;
+ }
+ else if (DVInsn->getOperand(0).isImm())
+ updated =
+ addConstantValue(VariableDie, DVInsn->getOperand(0),
+ DV->getType());
+ else if (DVInsn->getOperand(0).isFPImm())
+ updated =
+ addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isCImm())
+ updated =
+ addConstantValue(VariableDie,
+ DVInsn->getOperand(0).getCImm(),
+ DV->getType().isUnsignedDIType());
+ } else {
+ addVariableAddress(DV, VariableDie,
+ Asm->getDebugValueLocation(DVInsn));
+ updated = true;
+ }
+ if (!updated) {
+ // If variableDie is not updated then DBG_VALUE instruction does not
+ // have valid variable info.
+ delete VariableDie;
+ return NULL;
+ }
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ } else {
+ // .. else use frame index.
+ int FI = DV->getFrameIndex();
+ if (FI != ~0) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, VariableDie, Location);
+ }
+ }
+
+ DV->setDIE(VariableDie);
+ return VariableDie;
+}
+
+/// createMemberDIE - Create new member DIE.
+DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
+ DIE *MemberDie = new DIE(DT.getTag());
+ StringRef Name = DT.getName();
+ if (!Name.empty())
+ addString(MemberDie, dwarf::DW_AT_name, Name);
+
+ addType(MemberDie, DT.getTypeDerivedFrom());
+
+ addSourceLine(MemberDie, DT);
+
+ DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
+ uint64_t Size = DT.getSizeInBits();
+ uint64_t FieldSize = DT.getOriginalTypeSize();
+
+ if (Size != FieldSize) {
+ // Handle bitfield.
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+ addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+ uint64_t Offset = DT.getOffsetInBits();
+ uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ uint64_t FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (Asm->getDataLayout().isLittleEndian())
+ Offset = FieldSize - (Offset + Size);
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+
+ // Here WD_AT_data_member_location points to the anonymous
+ // field that includes this bit field.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+ } else
+ // This is not a bitfield.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+ if (DT.getTag() == dwarf::DW_TAG_inheritance
+ && DT.isVirtual()) {
+
+ // For C++, virtual base classes are not at fixed offset. Use following
+ // expression to extract appropriate offset from vtable.
+ // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+ DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+ VBaseLocationDie);
+ } else
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+
+ if (DT.isProtected())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ // Otherwise C++ member and base classes are considered public.
+ else
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+ if (DT.isVirtual())
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
+ dwarf::DW_VIRTUALITY_virtual);
+
+ // Objective-C properties.
+ if (MDNode *PNode = DT.getObjCProperty())
+ if (DIEEntry *PropertyDie = getDIEEntry(PNode))
+ MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
+ PropertyDie);
+
+ if (DT.isArtificial())
+ addFlag(MemberDie, dwarf::DW_AT_artificial);
+
+ return MemberDie;
+}
+
+/// createStaticMemberDIE - Create new DIE for C++ static member.
+DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) {
+ if (!DT.Verify())
+ return NULL;
+
+ DIE *StaticMemberDIE = new DIE(DT.getTag());
+ DIType Ty = DT.getTypeDerivedFrom();
+
+ addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName());
+ addType(StaticMemberDIE, Ty);
+ addSourceLine(StaticMemberDIE, DT);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_external);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_declaration);
+
+ // FIXME: We could omit private if the parent is a class_type, and
+ // public if the parent is something else.
+ if (DT.isProtected())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+
+ if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
+ addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType());
+ if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
+ addConstantFPValue(StaticMemberDIE, CFP);
+
+ insertDIE(DT, StaticMemberDIE);
+ return StaticMemberDIE;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 0000000..2b180c6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,383 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DIE.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo.h"
+
+namespace llvm {
+
+class DwarfDebug;
+class DwarfUnits;
+class MachineLocation;
+class MachineOperand;
+class ConstantInt;
+class ConstantFP;
+class DbgVariable;
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associated
+/// with a source file.
+class CompileUnit {
+ /// UniqueID - a numeric ID unique among all CUs in the module
+ ///
+ unsigned UniqueID;
+
+ /// Language - The DW_AT_language of the compile unit
+ ///
+ unsigned Language;
+
+ /// Die - Compile unit debug information entry.
+ ///
+ const OwningPtr<DIE> CUDie;
+
+ /// Asm - Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ // Holders for some common dwarf information.
+ DwarfDebug *DD;
+ DwarfUnits *DU;
+
+ /// IndexTyDie - An anonymous type for index type. Owned by CUDie.
+ DIE *IndexTyDie;
+
+ /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
+ /// variables to debug information entries.
+ DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+ /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
+ /// descriptors to debug information entries using a DIEEntry proxy.
+ DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+
+ /// GlobalNames - A map of globally visible named entities for this unit.
+ ///
+ StringMap<DIE*> GlobalNames;
+
+ /// GlobalTypes - A map of globally visible types for this unit.
+ ///
+ StringMap<DIE*> GlobalTypes;
+
+ /// AccelNames - A map of names for the name accelerator table.
+ ///
+ StringMap<std::vector<DIE*> > AccelNames;
+ StringMap<std::vector<DIE*> > AccelObjC;
+ StringMap<std::vector<DIE*> > AccelNamespace;
+ StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes;
+
+ /// DIEBlocks - A list of all the DIEBlocks in use.
+ std::vector<DIEBlock *> DIEBlocks;
+
+ /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+ /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+ /// corresponds to the MDNode mapped with the subprogram DIE.
+ DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+
+ /// Offset of the CUDie from beginning of debug info section.
+ unsigned DebugInfoOffset;
+
+ /// getLowerBoundDefault - Return the default lower bound for an array. If the
+ /// DWARF version doesn't handle the language, return -1.
+ int64_t getDefaultLowerBound() const;
+
+ /// getOrCreateContextDIE - Get context owner's DIE.
+ DIE *getOrCreateContextDIE(DIDescriptor Context);
+
+public:
+ CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW,
+ DwarfUnits *);
+ ~CompileUnit();
+
+ // Accessors.
+ unsigned getUniqueID() const { return UniqueID; }
+ unsigned getLanguage() const { return Language; }
+ DIE* getCUDie() const { return CUDie.get(); }
+ unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
+ const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; }
+ const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+
+ const StringMap<std::vector<DIE*> > &getAccelNames() const {
+ return AccelNames;
+ }
+ const StringMap<std::vector<DIE*> > &getAccelObjC() const {
+ return AccelObjC;
+ }
+ const StringMap<std::vector<DIE*> > &getAccelNamespace() const {
+ return AccelNamespace;
+ }
+ const StringMap<std::vector<std::pair<DIE*, unsigned > > >
+ &getAccelTypes() const {
+ return AccelTypes;
+ }
+
+ void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
+ /// hasContent - Return true if this compile unit has something to write out.
+ ///
+ bool hasContent() const { return !CUDie->getChildren().empty(); }
+
+ /// addGlobalName - Add a new global entity to the compile unit.
+ ///
+ void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; }
+
+ /// addGlobalType - Add a new global type to the compile unit.
+ ///
+ void addGlobalType(DIType Ty);
+
+
+ /// addAccelName - Add a new name to the name accelerator table.
+ void addAccelName(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelNames[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelObjC(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelObjC[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelNamespace(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelNamespace[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
+ std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name];
+ DIEs.push_back(Die);
+ }
+
+ /// getDIE - Returns the debug information entry map slot for the
+ /// specified debug variable.
+ DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
+
+ DIEBlock *getDIEBlock() {
+ return new (DIEValueAllocator) DIEBlock();
+ }
+
+ /// insertDIE - Insert DIE into the map.
+ void insertDIE(const MDNode *N, DIE *D) {
+ MDNodeToDieMap.insert(std::make_pair(N, D));
+ }
+
+ /// getDIEEntry - Returns the debug information entry for the specified
+ /// debug variable.
+ DIEEntry *getDIEEntry(const MDNode *N) {
+ DenseMap<const MDNode *, DIEEntry *>::iterator I =
+ MDNodeToDIEEntryMap.find(N);
+ if (I == MDNodeToDIEEntryMap.end())
+ return NULL;
+ return I->second;
+ }
+
+ /// insertDIEEntry - Insert debug information entry into the map.
+ void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+ MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+ }
+
+ /// addDie - Adds or interns the DIE to the compile unit.
+ ///
+ void addDie(DIE *Buffer) {
+ this->CUDie->addChild(Buffer);
+ }
+
+ // getIndexTyDie - Get an anonymous type for index type.
+ DIE *getIndexTyDie() {
+ return IndexTyDie;
+ }
+
+ // setIndexTyDie - Set D as anonymous type for index which can be reused
+ // later.
+ void setIndexTyDie(DIE *D) {
+ IndexTyDie = D;
+ }
+
+ /// addFlag - Add a flag that is true to the DIE.
+ void addFlag(DIE *Die, unsigned Attribute);
+
+ /// addUInt - Add an unsigned integer attribute data and value.
+ ///
+ void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+ /// addSInt - Add an signed integer attribute data and value.
+ ///
+ void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+ /// addString - Add a string attribute data and value.
+ ///
+ void addString(DIE *Die, unsigned Attribute, const StringRef Str);
+
+ /// addLocalString - Add a string attribute data and value.
+ ///
+ void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str);
+
+ /// addLabel - Add a Dwarf label attribute data and value.
+ ///
+ void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Label);
+
+ /// addLabelAddress - Add a dwarf label attribute data and value using
+ /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ ///
+ void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label);
+
+ /// addOpAddress - Add a dwarf op address data and value using the
+ /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ ///
+ void addOpAddress(DIE *Die, MCSymbol *Label);
+
+ /// addDelta - Add a label delta attribute data and value.
+ ///
+ void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ /// addDIEEntry - Add a DIE attribute data and value.
+ ///
+ void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+
+ /// addBlock - Add block data.
+ ///
+ void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+ /// addSourceLine - Add location information to specified debug information
+ /// entry.
+ void addSourceLine(DIE *Die, DIVariable V);
+ void addSourceLine(DIE *Die, DIGlobalVariable G);
+ void addSourceLine(DIE *Die, DISubprogram SP);
+ void addSourceLine(DIE *Die, DIType Ty);
+ void addSourceLine(DIE *Die, DINameSpace NS);
+ void addSourceLine(DIE *Die, DIObjCProperty Ty);
+
+ /// addAddress - Add an address attribute to a die based on the location
+ /// provided.
+ void addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addConstantValue - Add constant value entry in variable DIE.
+ bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
+ bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
+ bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned);
+
+ /// addConstantFPValue - Add constant value entry in variable DIE.
+ bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+ bool addConstantFPValue(DIE *Die, const ConstantFP *CFP);
+
+ /// addTemplateParams - Add template parameters in buffer.
+ void addTemplateParams(DIE &Buffer, DIArray TParams);
+
+ /// addRegisterOp - Add register operand.
+ void addRegisterOp(DIE *TheDie, unsigned Reg);
+
+ /// addRegisterOffset - Add register offset.
+ void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset);
+
+ /// addComplexAddress - Start with the address based on the location provided,
+ /// and generate the DWARF information necessary to find the actual variable
+ /// (navigating the extra location information encoded in the type) based on
+ /// the starting location. Add the DWARF information to the die.
+ ///
+ void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ // FIXME: Should be reformulated in terms of addComplexAddress.
+ /// addBlockByrefAddress - Start with the address based on the location
+ /// provided, and generate the DWARF information necessary to find the
+ /// actual Block variable (navigating the Block struct) based on the
+ /// starting location. Add the DWARF information to the die. Obsolete,
+ /// please use addComplexAddress instead.
+ ///
+ void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addVariableAddress - Add DW_AT_location attribute for a
+ /// DbgVariable based on provided MachineLocation.
+ void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
+
+ /// addToContextOwner - Add Die into the list of its context owner's children.
+ void addToContextOwner(DIE *Die, DIDescriptor Context);
+
+ /// addType - Add a new type attribute to the specified entity. This takes
+ /// and attribute parameter because DW_AT_friend attributes are also
+ /// type references.
+ void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type);
+
+ /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+ DIE *getOrCreateNameSpace(DINameSpace NS);
+
+ /// getOrCreateSubprogramDIE - Create new DIE using SP.
+ DIE *getOrCreateSubprogramDIE(DISubprogram SP);
+
+ /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+ /// given DIType.
+ DIE *getOrCreateTypeDIE(const MDNode *N);
+
+ /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateTypeParameter.
+ DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+ /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create
+ /// new DIE for the given DITemplateValueParameter.
+ DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
+ /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+ /// information entry.
+ DIEEntry *createDIEEntry(DIE *Entry);
+
+ /// createGlobalVariableDIE - create global variable DIE.
+ void createGlobalVariableDIE(const MDNode *N);
+
+ void addPubTypes(DISubprogram SP);
+
+ /// constructTypeDIE - Construct basic type die from DIBasicType.
+ void constructTypeDIE(DIE &Buffer,
+ DIBasicType BTy);
+
+ /// constructTypeDIE - Construct derived type die from DIDerivedType.
+ void constructTypeDIE(DIE &Buffer,
+ DIDerivedType DTy);
+
+ /// constructTypeDIE - Construct type DIE from DICompositeType.
+ void constructTypeDIE(DIE &Buffer,
+ DICompositeType CTy);
+
+ /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+ void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+ /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+ void constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy);
+
+ /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+ DIE *constructEnumTypeDIE(DIEnumerator ETy);
+
+ /// constructContainingTypeDIEs - Construct DIEs for types that contain
+ /// vtables.
+ void constructContainingTypeDIEs();
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract);
+
+ /// createMemberDIE - Create new member DIE.
+ DIE *createMemberDIE(DIDerivedType DT);
+
+ /// createStaticMemberDIE - Create new static data member DIE.
+ DIE *createStaticMemberDIE(DIDerivedType DT);
+
+private:
+
+ // DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+ DIEInteger *DIEIntegerOne;
+};
+
+} // end llvm namespace
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 0000000..d3cb4f9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,2570 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "DwarfAccelTable.h"
+#include "DwarfCompileUnit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+ cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
+static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
+ cl::desc("Make an absence of debug location information explicit."),
+ cl::init(false));
+
+static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames",
+ cl::Hidden, cl::init(false),
+ cl::desc("Generate DWARF pubnames section"));
+
+namespace {
+ enum DefaultOnOff {
+ Default, Enable, Disable
+ };
+}
+
+static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+ cl::desc("Output prototype dwarf accelerator tables."),
+ cl::values(
+ clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"),
+ clEnumValEnd),
+ cl::init(Default));
+
+static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
+ cl::desc("Compatibility with Darwin gdb."),
+ cl::values(
+ clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"),
+ clEnumValEnd),
+ cl::init(Default));
+
+static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
+ cl::desc("Output prototype dwarf split debug info."),
+ cl::values(
+ clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"),
+ clEnumValEnd),
+ cl::init(Default));
+
+namespace {
+ const char *DWARFGroupName = "DWARF Emission";
+ const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+// Configuration values for initial hash set sizes (log2).
+//
+static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
+
+namespace llvm {
+
+DIType DbgVariable::getType() const {
+ DIType Ty = Var.getType();
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (Var.isBlockByrefVariable()) {
+ /* Byref variables, in Blocks, are declared by the programmer as
+ "SomeType VarName;", but the compiler creates a
+ __Block_byref_x_VarName struct, and gives the variable VarName
+ either the struct, or a pointer to the struct, as its type. This
+ is necessary for various behind-the-scenes things the compiler
+ needs to do with by-reference variables in blocks.
+
+ However, as far as the original *programmer* is concerned, the
+ variable should still have type 'SomeType', as originally declared.
+
+ The following function dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable. This will be
+ passed back to the code generating the type for the Debug
+ Information Entry for the variable 'VarName'. 'VarName' will then
+ have the original type 'SomeType' in its debug information.
+
+ The original type 'SomeType' will be the type of the field named
+ 'VarName' inside the __Block_byref_x_VarName struct.
+
+ NOTE: In order for this to not completely fail on the debugger
+ side, the Debug Information Entry for the variable VarName needs to
+ have a DW_AT_location that tells the debugger how to unwind through
+ the pointers and __Block_byref_x_VarName struct to find the actual
+ value of the variable. The function addBlockByrefType does this. */
+ DIType subType = Ty;
+ unsigned tag = Ty.getTag();
+
+ if (tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ subType = DTy.getTypeDerivedFrom();
+ }
+
+ DICompositeType blockStruct = DICompositeType(subType);
+ DIArray Elements = blockStruct.getTypeArray();
+
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ if (getName() == DT.getName())
+ return (DT.getTypeDerivedFrom());
+ }
+ }
+ return Ty;
+}
+
+} // end llvm namespace
+
+DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+ : Asm(A), MMI(Asm->MMI), FirstCU(0),
+ AbbreviationsSet(InitAbbreviationsSetSize),
+ SourceIdMap(DIEValueAllocator),
+ PrevLabel(NULL), GlobalCUIndexCount(0),
+ InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string",
+ DIEValueAllocator),
+ SkeletonAbbrevSet(InitAbbreviationsSetSize),
+ SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string",
+ DIEValueAllocator) {
+
+ DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
+ DwarfStrSectionSym = TextSectionSym = 0;
+ DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
+ DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
+ FunctionBeginSym = FunctionEndSym = 0;
+
+ // Turn on accelerator tables and older gdb compatibility
+ // for Darwin.
+ bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+ if (DarwinGDBCompat == Default) {
+ if (IsDarwin)
+ IsDarwinGDBCompat = true;
+ else
+ IsDarwinGDBCompat = false;
+ } else
+ IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
+
+ if (DwarfAccelTables == Default) {
+ if (IsDarwin)
+ HasDwarfAccelTables = true;
+ else
+ HasDwarfAccelTables = false;
+ } else
+ HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+
+ if (SplitDwarf == Default)
+ HasSplitDwarf = false;
+ else
+ HasSplitDwarf = SplitDwarf == Enable ? true : false;
+
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ beginModule();
+ }
+}
+DwarfDebug::~DwarfDebug() {
+}
+
+// Switch to the specified MCSection and emit an assembler
+// temporary label to it if SymbolStem is specified.
+static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+ const char *SymbolStem = 0) {
+ Asm->OutStreamer.SwitchSection(Section);
+ if (!SymbolStem) return 0;
+
+ MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+ Asm->OutStreamer.EmitLabel(TmpSym);
+ return TmpSym;
+}
+
+MCSymbol *DwarfUnits::getStringPoolSym() {
+ return Asm->GetTempSymbol(StringPref);
+}
+
+MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) {
+ std::pair<MCSymbol*, unsigned> &Entry =
+ StringPool.GetOrCreateValue(Str).getValue();
+ if (Entry.first) return Entry.first;
+
+ Entry.second = NextStringPoolNumber++;
+ return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
+}
+
+unsigned DwarfUnits::getStringPoolIndex(StringRef Str) {
+ std::pair<MCSymbol*, unsigned> &Entry =
+ StringPool.GetOrCreateValue(Str).getValue();
+ if (Entry.first) return Entry.second;
+
+ Entry.second = NextStringPoolNumber++;
+ Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
+ return Entry.second;
+}
+
+unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) {
+ std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym];
+ if (Entry.first) return Entry.second;
+
+ Entry.second = NextAddrPoolNumber++;
+ Entry.first = Sym;
+ return Entry.second;
+}
+
+// Define a unique number for the abbreviation.
+//
+void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+ // Profile the node so that we can make it unique.
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+
+ // Check the set for priors.
+ DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev);
+
+ // If it's newly added.
+ if (InSet == &Abbrev) {
+ // Add to abbreviation list.
+ Abbreviations->push_back(&Abbrev);
+
+ // Assign the vector position + 1 as its number.
+ Abbrev.setNumber(Abbreviations->size());
+ } else {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ }
+}
+
+// If special LLVM prefix that is used to inform the asm
+// printer to not emit usual symbol prefix before the symbol name is used then
+// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+static bool isObjCClass(StringRef Name) {
+ return Name.startswith("+") || Name.startswith("-");
+}
+
+static bool hasObjCCategory(StringRef Name) {
+ if (!isObjCClass(Name)) return false;
+
+ size_t pos = Name.find(')');
+ if (pos != std::string::npos) {
+ if (Name[pos+1] != ' ') return false;
+ return true;
+ }
+ return false;
+}
+
+static void getObjCClassCategory(StringRef In, StringRef &Class,
+ StringRef &Category) {
+ if (!hasObjCCategory(In)) {
+ Class = In.slice(In.find('[') + 1, In.find(' '));
+ Category = "";
+ return;
+ }
+
+ Class = In.slice(In.find('[') + 1, In.find('('));
+ Category = In.slice(In.find('[') + 1, In.find(' '));
+ return;
+}
+
+static StringRef getObjCMethodName(StringRef In) {
+ return In.slice(In.find(' ') + 1, In.find(']'));
+}
+
+// Add the various names to the Dwarf accelerator table names.
+static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
+ DIE* Die) {
+ if (!SP.isDefinition()) return;
+
+ TheCU->addAccelName(SP.getName(), Die);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
+ TheCU->addAccelName(SP.getLinkageName(), Die);
+
+ // If this is an Objective-C selector name add it to the ObjC accelerator
+ // too.
+ if (isObjCClass(SP.getName())) {
+ StringRef Class, Category;
+ getObjCClassCategory(SP.getName(), Class, Category);
+ TheCU->addAccelObjC(Class, Die);
+ if (Category != "")
+ TheCU->addAccelObjC(Category, Die);
+ // Also add the base method name to the name table.
+ TheCU->addAccelName(getObjCMethodName(SP.getName()), Die);
+ }
+}
+
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
+DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
+ const MDNode *SPNode) {
+ DIE *SPDie = SPCU->getDIE(SPNode);
+
+ assert(SPDie && "Unable to find subprogram DIE!");
+ DISubprogram SP(SPNode);
+
+ // If we're updating an abstract DIE, then we will be adding the children and
+ // object pointer later on. But what we don't want to do is process the
+ // concrete DIE twice.
+ DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode);
+ if (AbsSPDIE) {
+ bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie());
+ // Pick up abstract subprogram DIE.
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of
+ // DW_FORM_ref4.
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+ InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ AbsSPDIE);
+ SPCU->addDie(SPDie);
+ } else {
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ if (!SPDecl.isSubprogram()) {
+ // There is not any need to generate specification DIE for a function
+ // defined at compile unit level. If a function is defined inside another
+ // function then gdb prefers the definition at top level and but does not
+ // expect specification DIE in parent function. So avoid creating
+ // specification DIE for a function defined inside a function.
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+ !SP.getContext().isFile() &&
+ !isSubprogramContext(SP.getContext())) {
+ SPCU->addFlag(SPDie, dwarf::DW_AT_declaration);
+
+ // Add arguments.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(Args.getElement(i));
+ SPCU->addType(Arg, ATy);
+ if (ATy.isArtificial())
+ SPCU->addFlag(Arg, dwarf::DW_AT_artificial);
+ if (ATy.isObjectPointer())
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer,
+ dwarf::DW_FORM_ref4, Arg);
+ SPDie->addChild(Arg);
+ }
+ DIE *SPDeclDie = SPDie;
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, SPDeclDie);
+ SPCU->addDie(SPDie);
+ }
+ }
+ }
+
+ SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc,
+ Asm->GetTempSymbol("func_begin",
+ Asm->getFunctionNumber()));
+ SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc,
+ Asm->GetTempSymbol("func_end",
+ Asm->getFunctionNumber()));
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+ SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_subprogram nodes.
+ addSubprogramNames(SPCU, SP, SPDie);
+
+ return SPDie;
+}
+
+// Construct new DW_TAG_lexical_block for this scope and attach
+// DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
+ LexicalScope *Scope) {
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
+
+ const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
+ if (Ranges.empty())
+ return 0;
+
+ SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
+ if (Ranges.size() > 1) {
+ // .debug_range section has not been laid out yet. Emit offset in
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // DW_AT_ranges appropriately.
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+ DebugRangeSymbols.size()
+ * Asm->getDataLayout().getPointerSize());
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+ DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+ }
+ DebugRangeSymbols.push_back(NULL);
+ DebugRangeSymbols.push_back(NULL);
+ return ScopeDIE;
+ }
+
+ MCSymbol *Start = getLabelBeforeInsn(RI->first);
+ MCSymbol *End = getLabelAfterInsn(RI->second);
+
+ if (End == 0) return 0;
+
+ assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
+ assert(End->isDefined() && "Invalid end label for an inlined scope!");
+
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start);
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End);
+
+ return ScopeDIE;
+}
+
+// This scope represents inlined body of a function. Construct DIE to
+// represent this concrete inlined copy of the function.
+DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
+ LexicalScope *Scope) {
+ const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
+ assert(Ranges.empty() == false &&
+ "LexicalScope does not have instruction markers!");
+
+ if (!Scope->getScopeNode())
+ return NULL;
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS);
+ DIE *OriginDIE = TheCU->getDIE(InlinedSP);
+ if (!OriginDIE) {
+ DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram.");
+ return NULL;
+ }
+
+ SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
+ MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+ MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+
+ if (StartLabel == 0 || EndLabel == 0) {
+ llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
+ }
+ assert(StartLabel->isDefined() &&
+ "Invalid starting label for an inlined scope!");
+ assert(EndLabel->isDefined() &&
+ "Invalid end label for an inlined scope!");
+
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
+ TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, OriginDIE);
+
+ if (Ranges.size() > 1) {
+ // .debug_range section has not been laid out yet. Emit offset in
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // DW_AT_ranges appropriately.
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+ DebugRangeSymbols.size()
+ * Asm->getDataLayout().getPointerSize());
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+ DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+ }
+ DebugRangeSymbols.push_back(NULL);
+ DebugRangeSymbols.push_back(NULL);
+ } else {
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel);
+ TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel);
+ }
+
+ InlinedSubprogramDIEs.insert(OriginDIE);
+
+ // Track the start label for this inlined function.
+ //.debug_inlined section specification does not clearly state how
+ // to emit inlined scope that is split into multiple instruction ranges.
+ // For now, use first instruction range and emit low_pc/high_pc pair and
+ // corresponding .debug_inlined section entry for this pair.
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ I = InlineInfo.find(InlinedSP);
+
+ if (I == InlineInfo.end()) {
+ InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE));
+ InlinedSPNodes.push_back(InlinedSP);
+ } else
+ I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
+
+ DILocation DL(Scope->getInlinedAt());
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
+ getOrCreateSourceID(DL.getFilename(), DL.getDirectory(),
+ TheCU->getUniqueID()));
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+ addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
+
+ return ScopeDIE;
+}
+
+// Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
+ if (!Scope || !Scope->getScopeNode())
+ return NULL;
+
+ DIScope DS(Scope->getScopeNode());
+ // Early return to avoid creating dangling variable|scope DIEs.
+ if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() &&
+ !TheCU->getDIE(DS))
+ return NULL;
+
+ SmallVector<DIE *, 8> Children;
+ DIE *ObjectPointer = NULL;
+
+ // Collect arguments for current function.
+ if (LScopes.isCurrentFunctionScope(Scope))
+ for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
+ if (DbgVariable *ArgDV = CurrentFnArguments[i])
+ if (DIE *Arg =
+ TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) {
+ Children.push_back(Arg);
+ if (ArgDV->isObjectPointer()) ObjectPointer = Arg;
+ }
+
+ // Collect lexical scope children first.
+ const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+ if (DIE *Variable =
+ TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) {
+ Children.push_back(Variable);
+ if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
+ }
+ const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+ if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
+ Children.push_back(Nested);
+ DIE *ScopeDIE = NULL;
+ if (Scope->getInlinedAt())
+ ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
+ else if (DS.isSubprogram()) {
+ ProcessedSPNodes.insert(DS);
+ if (Scope->isAbstractScope()) {
+ ScopeDIE = TheCU->getDIE(DS);
+ // Note down abstract DIE.
+ if (ScopeDIE)
+ AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
+ }
+ else
+ ScopeDIE = updateSubprogramScopeDIE(TheCU, DS);
+ }
+ else {
+ // There is no need to emit empty lexical block DIE.
+ if (Children.empty())
+ return NULL;
+ ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
+ }
+
+ if (!ScopeDIE) return NULL;
+
+ // Add children
+ for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ ScopeDIE->addChild(*I);
+
+ if (DS.isSubprogram() && ObjectPointer != NULL)
+ TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer,
+ dwarf::DW_FORM_ref4, ObjectPointer);
+
+ if (DS.isSubprogram())
+ TheCU->addPubTypes(DISubprogram(DS));
+
+ return ScopeDIE;
+}
+
+// Look up the source id with the given directory and source file names.
+// If none currently exists, create a new id and insert it in the
+// SourceIds map. This can update DirectoryNames and SourceFileNames maps
+// as well.
+unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
+ StringRef DirName, unsigned CUID) {
+ // If we use .loc in assembly, we can't separate .file entries according to
+ // compile units. Thus all files will belong to the default compile unit.
+ if (Asm->TM.hasMCUseLoc() &&
+ Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+ CUID = 0;
+
+ // If FE did not provide a file name, then assume stdin.
+ if (FileName.empty())
+ return getOrCreateSourceID("<stdin>", StringRef(), CUID);
+
+ // TODO: this might not belong here. See if we can factor this better.
+ if (DirName == CompilationDir)
+ DirName = "";
+
+ // FileIDCUMap stores the current ID for the given compile unit.
+ unsigned SrcId = FileIDCUMap[CUID] + 1;
+
+ // We look up the CUID/file/dir by concatenating them with a zero byte.
+ SmallString<128> NamePair;
+ NamePair += CUID;
+ NamePair += '\0';
+ NamePair += DirName;
+ NamePair += '\0'; // Zero bytes are not allowed in paths.
+ NamePair += FileName;
+
+ StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
+ if (Ent.getValue() != SrcId)
+ return Ent.getValue();
+
+ FileIDCUMap[CUID] = SrcId;
+ // Print out a .file directive to specify files for .loc directives.
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID);
+
+ return SrcId;
+}
+
+// Create new CompileUnit for the given metadata node with tag
+// DW_TAG_compile_unit.
+CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
+ DICompileUnit DIUnit(N);
+ StringRef FN = DIUnit.getFilename();
+ CompilationDir = DIUnit.getDirectory();
+
+ DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+ CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+ DIUnit.getLanguage(), Die, Asm,
+ this, &InfoHolder);
+
+ FileIDCUMap[NewCU->getUniqueID()] = 0;
+ // Call this to emit a .file directive if it wasn't emitted for the source
+ // file this CU comes from yet.
+ getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID());
+
+ NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
+ NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ DIUnit.getLanguage());
+ NewCU->addString(Die, dwarf::DW_AT_name, FN);
+ // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+ // into an entity. We're using 0 (or a NULL label) for this.
+ NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
+
+ // Define start line table label for each Compile Unit.
+ MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
+ NewCU->getUniqueID());
+ Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym,
+ NewCU->getUniqueID());
+
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section.
+ // The line table entries are not always emitted in assembly, so it
+ // is not okay to use line_table_start here.
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ NewCU->getUniqueID() == 0 ?
+ Asm->GetTempSymbol("section_line") : LineTableStartSym);
+ else if (NewCU->getUniqueID() == 0)
+ NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ else
+ NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ LineTableStartSym, DwarfLineSectionSym);
+
+ if (!CompilationDir.empty())
+ NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+ if (DIUnit.isOptimized())
+ NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
+
+ StringRef Flags = DIUnit.getFlags();
+ if (!Flags.empty())
+ NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
+
+ if (unsigned RVer = DIUnit.getRunTimeVersion())
+ NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+
+ if (!FirstCU)
+ FirstCU = NewCU;
+
+ InfoHolder.addUnit(NewCU);
+
+ CUMap.insert(std::make_pair(N, NewCU));
+ return NewCU;
+}
+
+// Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
+ const MDNode *N) {
+ CompileUnit *&CURef = SPMap[N];
+ if (CURef)
+ return;
+ CURef = TheCU;
+
+ DISubprogram SP(N);
+ if (!SP.isDefinition())
+ // This is a method declaration which will be handled while constructing
+ // class type.
+ return;
+
+ DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP);
+
+ // Add to map.
+ TheCU->insertDIE(N, SubprogramDie);
+
+ // Add to context owner.
+ TheCU->addToContextOwner(SubprogramDie, SP.getContext());
+
+ // Expose as global, if requested.
+ if (GenerateDwarfPubNamesSection)
+ TheCU->addGlobalName(SP.getName(), SubprogramDie);
+}
+
+// Emit all Dwarf sections that should come prior to the content. Create
+// global DIEs and emit initial debug info sections. This is invoked by
+// the target AsmPrinter.
+void DwarfDebug::beginModule() {
+ if (DisableDebugInfoPrinting)
+ return;
+
+ const Module *M = MMI->getModule();
+
+ // If module has named metadata anchors then use them, otherwise scan the
+ // module using debug info finder to collect debug info.
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes)
+ return;
+
+ // Emit initial sections so we can reference labels later.
+ emitSectionLabels();
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CUNode(CU_Nodes->getOperand(i));
+ CompileUnit *CU = constructCompileUnit(CUNode);
+ DIArray GVs = CUNode.getGlobalVariables();
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
+ CU->createGlobalVariableDIE(GVs.getElement(i));
+ DIArray SPs = CUNode.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+ constructSubprogramDIE(CU, SPs.getElement(i));
+ DIArray EnumTypes = CUNode.getEnumTypes();
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
+ DIArray RetainedTypes = CUNode.getRetainedTypes();
+ for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+ // If we're splitting the dwarf out now that we've got the entire
+ // CU then construct a skeleton CU based upon it.
+ if (useSplitDwarf()) {
+ // This should be a unique identifier when we want to build .dwp files.
+ CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+ // Now construct the skeleton CU associated.
+ constructSkeletonCU(CUNode);
+ }
+ }
+
+ // Tell MMI that we have debug info.
+ MMI->setDebugInfoAvailability(true);
+
+ // Prime section data.
+ SectionMap.insert(Asm->getObjFileLowering().getTextSection());
+}
+
+// Attach DW_AT_inline attribute with inlined subprogram DIEs.
+void DwarfDebug::computeInlinedDIEs() {
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+ AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+ DIE *ISP = *AI;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+ for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
+ AE = AbstractSPDies.end(); AI != AE; ++AI) {
+ DIE *ISP = AI->second;
+ if (InlinedSubprogramDIEs.count(ISP))
+ continue;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+}
+
+// Collect info for variables that were optimized out.
+void DwarfDebug::collectDeadVariables() {
+ const Module *M = MMI->getModule();
+ DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap;
+
+ if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit TheCU(CU_Nodes->getOperand(i));
+ DIArray Subprograms = TheCU.getSubprograms();
+ for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
+ DISubprogram SP(Subprograms.getElement(i));
+ if (ProcessedSPNodes.count(SP) != 0) continue;
+ if (!SP.Verify()) continue;
+ if (!SP.isDefinition()) continue;
+ DIArray Variables = SP.getVariables();
+ if (Variables.getNumElements() == 0) continue;
+
+ LexicalScope *Scope =
+ new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
+ DeadFnScopeMap[SP] = Scope;
+
+ // Construct subprogram DIE and add variables DIEs.
+ CompileUnit *SPCU = CUMap.lookup(TheCU);
+ assert(SPCU && "Unable to find Compile Unit!");
+ constructSubprogramDIE(SPCU, SP);
+ DIE *ScopeDIE = SPCU->getDIE(SP);
+ for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
+ DIVariable DV(Variables.getElement(vi));
+ if (!DV.Verify()) continue;
+ DbgVariable *NewVar = new DbgVariable(DV, NULL);
+ if (DIE *VariableDIE =
+ SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
+ ScopeDIE->addChild(VariableDIE);
+ }
+ }
+ }
+ }
+ DeleteContainerSeconds(DeadFnScopeMap);
+}
+
+void DwarfDebug::finalizeModuleInfo() {
+ // Collect info for variables that were optimized out.
+ collectDeadVariables();
+
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ computeInlinedDIEs();
+
+ // Emit DW_AT_containing_type attribute to connect types with their
+ // vtable holding type.
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(),
+ CUE = CUMap.end(); CUI != CUE; ++CUI) {
+ CompileUnit *TheCU = CUI->second;
+ TheCU->constructContainingTypeDIEs();
+ }
+
+ // Compute DIE offsets and sizes.
+ InfoHolder.computeSizeAndOffsets();
+ if (useSplitDwarf())
+ SkeletonHolder.computeSizeAndOffsets();
+}
+
+void DwarfDebug::endSections() {
+ // Standard sections final addresses.
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
+
+ // End text sections.
+ for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) {
+ Asm->OutStreamer.SwitchSection(SectionMap[I]);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1));
+ }
+}
+
+// Emit all Dwarf sections that should come after the content.
+void DwarfDebug::endModule() {
+
+ if (!FirstCU) return;
+
+ // End any existing sections.
+ // TODO: Does this need to happen?
+ endSections();
+
+ // Finalize the debug info for the module.
+ finalizeModuleInfo();
+
+ if (!useSplitDwarf()) {
+ // Emit all the DIEs into a debug info section.
+ emitDebugInfo();
+
+ // Corresponding abbreviations into a abbrev section.
+ emitAbbreviations();
+
+ // Emit info into a debug loc section.
+ emitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ emitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ emitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ emitDebugMacInfo();
+
+ // Emit inline info.
+ // TODO: When we don't need the option anymore we
+ // can remove all of the code that this section
+ // depends upon.
+ if (useDarwinGDBCompat())
+ emitDebugInlineInfo();
+ } else {
+ // TODO: Fill this in for separated debug sections and separate
+ // out information into new sections.
+
+ // Emit the debug info section and compile units.
+ emitDebugInfo();
+ emitDebugInfoDWO();
+
+ // Corresponding abbreviations into a abbrev section.
+ emitAbbreviations();
+ emitDebugAbbrevDWO();
+
+ // Emit info into a debug loc section.
+ emitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ emitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ emitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ emitDebugMacInfo();
+
+ // Emit DWO addresses.
+ InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection());
+
+ // Emit inline info.
+ // TODO: When we don't need the option anymore we
+ // can remove all of the code that this section
+ // depends upon.
+ if (useDarwinGDBCompat())
+ emitDebugInlineInfo();
+ }
+
+ // Emit info into the dwarf accelerator table sections.
+ if (useDwarfAccelTables()) {
+ emitAccelNames();
+ emitAccelObjC();
+ emitAccelNamespaces();
+ emitAccelTypes();
+ }
+
+ // Emit info into a debug pubnames section, if requested.
+ if (GenerateDwarfPubNamesSection)
+ emitDebugPubnames();
+
+ // Emit info into a debug pubtypes section.
+ // TODO: When we don't need the option anymore we can
+ // remove all of the code that adds to the table.
+ if (useDarwinGDBCompat())
+ emitDebugPubTypes();
+
+ // Finally emit string information into a string table.
+ emitDebugStr();
+ if (useSplitDwarf())
+ emitDebugStrDWO();
+
+ // clean up.
+ SPMap.clear();
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I)
+ delete I->second;
+
+ for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(),
+ E = SkeletonCUs.end(); I != E; ++I)
+ delete *I;
+
+ // Reset these for the next Module if we have one.
+ FirstCU = NULL;
+}
+
+// Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
+ DebugLoc ScopeLoc) {
+ LLVMContext &Ctx = DV->getContext();
+ // More then one inlined variable corresponds to one abstract variable.
+ DIVariable Var = cleanseInlinedVariable(DV, Ctx);
+ DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
+ if (AbsDbgVariable)
+ return AbsDbgVariable;
+
+ LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx));
+ if (!Scope)
+ return NULL;
+
+ AbsDbgVariable = new DbgVariable(Var, NULL);
+ addScopeVariable(Scope, AbsDbgVariable);
+ AbstractVariables[Var] = AbsDbgVariable;
+ return AbsDbgVariable;
+}
+
+// If Var is a current function argument then add it to CurrentFnArguments list.
+bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
+ DbgVariable *Var, LexicalScope *Scope) {
+ if (!LScopes.isCurrentFunctionScope(Scope))
+ return false;
+ DIVariable DV = Var->getVariable();
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ return false;
+ unsigned ArgNo = DV.getArgNumber();
+ if (ArgNo == 0)
+ return false;
+
+ size_t Size = CurrentFnArguments.size();
+ if (Size == 0)
+ CurrentFnArguments.resize(MF->getFunction()->arg_size());
+ // llvm::Function argument size is not good indicator of how many
+ // arguments does the function have at source level.
+ if (ArgNo > Size)
+ CurrentFnArguments.resize(ArgNo * 2);
+ CurrentFnArguments[ArgNo - 1] = Var;
+ return true;
+}
+
+// Collect variable information from side table maintained by MMI.
+void
+DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
+ MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+ for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+ VE = VMap.end(); VI != VE; ++VI) {
+ const MDNode *Var = VI->first;
+ if (!Var) continue;
+ Processed.insert(Var);
+ DIVariable DV(Var);
+ const std::pair<unsigned, DebugLoc> &VP = VI->second;
+
+ LexicalScope *Scope = LScopes.findLexicalScope(VP.second);
+
+ // If variable scope is not found then skip this variable.
+ if (Scope == 0)
+ continue;
+
+ DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
+ DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable);
+ RegVar->setFrameIndex(VP.first);
+ if (!addCurrentFnArgument(MF, RegVar, Scope))
+ addScopeVariable(Scope, RegVar);
+ if (AbsDbgVariable)
+ AbsDbgVariable->setFrameIndex(VP.first);
+ }
+}
+
+// Return true if debug value, encoded by DBG_VALUE instruction, is in a
+// defined reg.
+static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
+ assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+ return MI->getNumOperands() == 3 &&
+ MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
+ MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
+}
+
+// Get .debug_loc entry for the instruction range starting at MI.
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
+ const MCSymbol *FLabel,
+ const MCSymbol *SLabel,
+ const MachineInstr *MI) {
+ const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+ if (MI->getNumOperands() != 3) {
+ MachineLocation MLoc = Asm->getDebugValueLocation(MI);
+ return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+ }
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
+ MachineLocation MLoc;
+ MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+ }
+ if (MI->getOperand(0).isImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm());
+ if (MI->getOperand(0).isFPImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm());
+ if (MI->getOperand(0).isCImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
+
+ llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
+}
+
+// Find variables for each lexical scope.
+void
+DwarfDebug::collectVariableInfo(const MachineFunction *MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
+
+ // collection info from MMI table.
+ collectVariableInfoFromMMITable(MF, Processed);
+
+ for (SmallVectorImpl<const MDNode*>::const_iterator
+ UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE;
+ ++UVI) {
+ const MDNode *Var = *UVI;
+ if (Processed.count(Var))
+ continue;
+
+ // History contains relevant DBG_VALUE instructions for Var and instructions
+ // clobbering it.
+ SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+ if (History.empty())
+ continue;
+ const MachineInstr *MInsn = History.front();
+
+ DIVariable DV(Var);
+ LexicalScope *Scope = NULL;
+ if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(DV.getContext()).describes(MF->getFunction()))
+ Scope = LScopes.getCurrentFunctionScope();
+ else if (MDNode *IA = DV.getInlinedAt())
+ Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
+ else
+ Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ Processed.insert(DV);
+ assert(MInsn->isDebugValue() && "History must begin with debug value");
+ DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
+ DbgVariable *RegVar = new DbgVariable(DV, AbsVar);
+ if (!addCurrentFnArgument(MF, RegVar, Scope))
+ addScopeVariable(Scope, RegVar);
+ if (AbsVar)
+ AbsVar->setMInsn(MInsn);
+
+ // Simplify ranges that are fully coalesced.
+ if (History.size() <= 1 || (History.size() == 2 &&
+ MInsn->isIdenticalTo(History.back()))) {
+ RegVar->setMInsn(MInsn);
+ continue;
+ }
+
+ // Handle multiple DBG_VALUE instructions describing one variable.
+ RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
+
+ for (SmallVectorImpl<const MachineInstr*>::const_iterator
+ HI = History.begin(), HE = History.end(); HI != HE; ++HI) {
+ const MachineInstr *Begin = *HI;
+ assert(Begin->isDebugValue() && "Invalid History entry");
+
+ // Check if DBG_VALUE is truncating a range.
+ if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg()
+ && !Begin->getOperand(0).getReg())
+ continue;
+
+ // Compute the range for a register location.
+ const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
+ const MCSymbol *SLabel = 0;
+
+ if (HI + 1 == HE)
+ // If Begin is the last instruction in History then its value is valid
+ // until the end of the function.
+ SLabel = FunctionEndSym;
+ else {
+ const MachineInstr *End = HI[1];
+ DEBUG(dbgs() << "DotDebugLoc Pair:\n"
+ << "\t" << *Begin << "\t" << *End << "\n");
+ if (End->isDebugValue())
+ SLabel = getLabelBeforeInsn(End);
+ else {
+ // End is a normal instruction clobbering the range.
+ SLabel = getLabelAfterInsn(End);
+ assert(SLabel && "Forgot label after clobber instruction");
+ ++HI;
+ }
+ }
+
+ // The value is valid until the next DBG_VALUE or clobber.
+ DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel,
+ Begin));
+ }
+ DotDebugLocEntries.push_back(DotDebugLocEntry());
+ }
+
+ // Collect info for variables that were optimized out.
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables();
+ for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
+ DIVariable DV(Variables.getElement(i));
+ if (!DV || !DV.Verify() || !Processed.insert(DV))
+ continue;
+ if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
+ addScopeVariable(Scope, new DbgVariable(DV, NULL));
+ }
+}
+
+// Return Label preceding the instruction.
+MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+ MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
+ assert(Label && "Didn't insert label before instruction");
+ return Label;
+}
+
+// Return Label immediately following the instruction.
+MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+ return LabelsAfterInsn.lookup(MI);
+}
+
+// Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+ // Check if source location changes, but ignore DBG_VALUE locations.
+ if (!MI->isDebugValue()) {
+ DebugLoc DL = MI->getDebugLoc();
+ if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
+ unsigned Flags = 0;
+ PrevInstLoc = DL;
+ if (DL == PrologEndLoc) {
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ PrologEndLoc = DebugLoc();
+ }
+ if (PrologEndLoc.isUnknown())
+ Flags |= DWARF2_FLAG_IS_STMT;
+
+ if (!DL.isUnknown()) {
+ const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+ } else
+ recordSourceLine(0, 0, 0, 0);
+ }
+ }
+
+ // Insert labels where requested.
+ DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+ LabelsBeforeInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsBeforeInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+// Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
+ // Don't create a new label after DBG_VALUE instructions.
+ // They don't generate code.
+ if (!MI->isDebugValue())
+ PrevLabel = 0;
+
+ DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+ LabelsAfterInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsAfterInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ // We need a label after this instruction.
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+// Each LexicalScope has first instruction and last instruction to mark
+// beginning and end of a scope respectively. Create an inverse map that list
+// scopes starts (and ends) with an instruction. One instruction may start (or
+// end) multiple scopes. Ignore scopes that are not reachable.
+void DwarfDebug::identifyScopeMarkers() {
+ SmallVector<LexicalScope *, 4> WorkList;
+ WorkList.push_back(LScopes.getCurrentFunctionScope());
+ while (!WorkList.empty()) {
+ LexicalScope *S = WorkList.pop_back_val();
+
+ const SmallVector<LexicalScope *, 4> &Children = S->getChildren();
+ if (!Children.empty())
+ for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+
+ if (S->isAbstractScope())
+ continue;
+
+ const SmallVector<InsnRange, 4> &Ranges = S->getRanges();
+ if (Ranges.empty())
+ continue;
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ assert(RI->first && "InsnRange does not have first instruction!");
+ assert(RI->second && "InsnRange does not have second instruction!");
+ requestLabelBeforeInsn(RI->first);
+ requestLabelAfterInsn(RI->second);
+ }
+ }
+}
+
+// Get MDNode for DebugLoc's scope.
+static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
+ if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
+ return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
+ return DL.getScope(Ctx);
+}
+
+// Walk up the scope chain of given debug loc and find line number info
+// for the function.
+static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
+ const MDNode *Scope = getScopeNode(DL, Ctx);
+ DISubprogram SP = getDISubprogram(Scope);
+ if (SP.Verify()) {
+ // Check for number of operands since the compatibility is
+ // cheap here.
+ if (SP->getNumOperands() > 19)
+ return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
+ else
+ return DebugLoc::get(SP.getLineNumber(), 0, SP);
+ }
+
+ return DebugLoc();
+}
+
+// Gather pre-function debug information. Assumes being called immediately
+// after the function entry point has been emitted.
+void DwarfDebug::beginFunction(const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo()) return;
+ LScopes.initialize(*MF);
+ if (LScopes.empty()) return;
+ identifyScopeMarkers();
+
+ // Set DwarfCompileUnitID in MCContext to the Compile Unit this function
+ // belongs to.
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+ assert(TheCU && "Unable to find compile unit!");
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+
+ FunctionBeginSym = Asm->GetTempSymbol("func_begin",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionBeginSym);
+
+ assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
+
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ // LiveUserVar - Map physreg numbers to the MDNode they contain.
+ std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ bool AtBlockEntry = true;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MI = II;
+
+ if (MI->isDebugValue()) {
+ assert(MI->getNumOperands() > 1 && "Invalid machine instruction!");
+
+ // Keep track of user variables.
+ const MDNode *Var =
+ MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+ // Variable is in a register, we need to check for clobbers.
+ if (isDbgValueInDefinedReg(MI))
+ LiveUserVar[MI->getOperand(0).getReg()] = Var;
+
+ // Check the history of this variable.
+ SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+ if (History.empty()) {
+ UserVariables.push_back(Var);
+ // The first mention of a function argument gets the FunctionBeginSym
+ // label, so arguments are visible when breaking at function entry.
+ DIVariable DV(Var);
+ if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(getDISubprogram(DV.getContext()))
+ .describes(MF->getFunction()))
+ LabelsBeforeInsn[MI] = FunctionBeginSym;
+ } else {
+ // We have seen this variable before. Try to coalesce DBG_VALUEs.
+ const MachineInstr *Prev = History.back();
+ if (Prev->isDebugValue()) {
+ // Coalesce identical entries at the end of History.
+ if (History.size() >= 2 &&
+ Prev->isIdenticalTo(History[History.size() - 2])) {
+ DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
+ << "\t" << *Prev
+ << "\t" << *History[History.size() - 2] << "\n");
+ History.pop_back();
+ }
+
+ // Terminate old register assignments that don't reach MI;
+ MachineFunction::const_iterator PrevMBB = Prev->getParent();
+ if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) &&
+ isDbgValueInDefinedReg(Prev)) {
+ // Previous register assignment needs to terminate at the end of
+ // its basic block.
+ MachineBasicBlock::const_iterator LastMI =
+ PrevMBB->getLastNonDebugInstr();
+ if (LastMI == PrevMBB->end()) {
+ // Drop DBG_VALUE for empty range.
+ DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n"
+ << "\t" << *Prev << "\n");
+ History.pop_back();
+ }
+ else {
+ // Terminate after LastMI.
+ History.push_back(LastMI);
+ }
+ }
+ }
+ }
+ History.push_back(MI);
+ } else {
+ // Not a DBG_VALUE instruction.
+ if (!MI->isLabel())
+ AtBlockEntry = false;
+
+ // First known non-DBG_VALUE and non-frame setup location marks
+ // the beginning of the function body.
+ if (!MI->getFlag(MachineInstr::FrameSetup) &&
+ (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown()))
+ PrologEndLoc = MI->getDebugLoc();
+
+ // Check if the instruction clobbers any registers with debug vars.
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
+ continue;
+ for (MCRegAliasIterator AI(MOI->getReg(), TRI, true);
+ AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ const MDNode *Var = LiveUserVar[Reg];
+ if (!Var)
+ continue;
+ // Reg is now clobbered.
+ LiveUserVar[Reg] = 0;
+
+ // Was MD last defined by a DBG_VALUE referring to Reg?
+ DbgValueHistoryMap::iterator HistI = DbgValues.find(Var);
+ if (HistI == DbgValues.end())
+ continue;
+ SmallVectorImpl<const MachineInstr*> &History = HistI->second;
+ if (History.empty())
+ continue;
+ const MachineInstr *Prev = History.back();
+ // Sanity-check: Register assignments are terminated at the end of
+ // their block.
+ if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent())
+ continue;
+ // Is the variable still in Reg?
+ if (!isDbgValueInDefinedReg(Prev) ||
+ Prev->getOperand(0).getReg() != Reg)
+ continue;
+ // Var is clobbered. Make sure the next instruction gets a label.
+ History.push_back(MI);
+ }
+ }
+ }
+ }
+ }
+
+ for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end();
+ I != E; ++I) {
+ SmallVectorImpl<const MachineInstr*> &History = I->second;
+ if (History.empty())
+ continue;
+
+ // Make sure the final register assignments are terminated.
+ const MachineInstr *Prev = History.back();
+ if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
+ const MachineBasicBlock *PrevMBB = Prev->getParent();
+ MachineBasicBlock::const_iterator LastMI =
+ PrevMBB->getLastNonDebugInstr();
+ if (LastMI == PrevMBB->end())
+ // Drop DBG_VALUE for empty range.
+ History.pop_back();
+ else {
+ // Terminate after LastMI.
+ History.push_back(LastMI);
+ }
+ }
+ // Request labels for the full history.
+ for (unsigned i = 0, e = History.size(); i != e; ++i) {
+ const MachineInstr *MI = History[i];
+ if (MI->isDebugValue())
+ requestLabelBeforeInsn(MI);
+ else
+ requestLabelAfterInsn(MI);
+ }
+ }
+
+ PrevInstLoc = DebugLoc();
+ PrevLabel = FunctionBeginSym;
+
+ // Record beginning of function.
+ if (!PrologEndLoc.isUnknown()) {
+ DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc,
+ MF->getFunction()->getContext());
+ recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
+ FnStartDL.getScope(MF->getFunction()->getContext()),
+ // We'd like to list the prologue as "not statements" but GDB behaves
+ // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+ DWARF2_FLAG_IS_STMT);
+ }
+}
+
+void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+// SmallVector<DbgVariable *, 8> &Vars = ScopeVariables.lookup(LS);
+ ScopeVariables[LS].push_back(Var);
+// Vars.push_back(Var);
+}
+
+// Gather and emit post-function debug information.
+void DwarfDebug::endFunction(const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo() || LScopes.empty()) return;
+
+ // Define end label for subprogram.
+ FunctionEndSym = Asm->GetTempSymbol("func_end",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionEndSym);
+ // Set DwarfCompileUnitID in MCContext to default value.
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+
+ SmallPtrSet<const MDNode *, 16> ProcessedVars;
+ collectVariableInfo(MF, ProcessedVars);
+
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+ assert(TheCU && "Unable to find compile unit!");
+
+ // Construct abstract scopes.
+ ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList();
+ for (unsigned i = 0, e = AList.size(); i != e; ++i) {
+ LexicalScope *AScope = AList[i];
+ DISubprogram SP(AScope->getScopeNode());
+ if (SP.Verify()) {
+ // Collect info for variables that were optimized out.
+ DIArray Variables = SP.getVariables();
+ for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
+ DIVariable DV(Variables.getElement(i));
+ if (!DV || !DV.Verify() || !ProcessedVars.insert(DV))
+ continue;
+ // Check that DbgVariable for DV wasn't created earlier, when
+ // findAbstractVariable() was called for inlined instance of DV.
+ LLVMContext &Ctx = DV->getContext();
+ DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx);
+ if (AbstractVariables.lookup(CleanDV))
+ continue;
+ if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext()))
+ addScopeVariable(Scope, new DbgVariable(DV, NULL));
+ }
+ }
+ if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0)
+ constructScopeDIE(TheCU, AScope);
+ }
+
+ DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
+
+ if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
+ TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
+
+ DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
+ MMI->getFrameMoves()));
+
+ // Clear debug info
+ for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator
+ I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I)
+ DeleteContainerPointers(I->second);
+ ScopeVariables.clear();
+ DeleteContainerPointers(CurrentFnArguments);
+ UserVariables.clear();
+ DbgValues.clear();
+ AbstractVariables.clear();
+ LabelsBeforeInsn.clear();
+ LabelsAfterInsn.clear();
+ PrevLabel = NULL;
+}
+
+// Register a source line with debug info. Returns the unique label that was
+// emitted and which provides correspondence to the source line list.
+void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
+ unsigned Flags) {
+ StringRef Fn;
+ StringRef Dir;
+ unsigned Src = 1;
+ if (S) {
+ DIDescriptor Scope(S);
+
+ if (Scope.isCompileUnit()) {
+ DICompileUnit CU(S);
+ Fn = CU.getFilename();
+ Dir = CU.getDirectory();
+ } else if (Scope.isFile()) {
+ DIFile F(S);
+ Fn = F.getFilename();
+ Dir = F.getDirectory();
+ } else if (Scope.isSubprogram()) {
+ DISubprogram SP(S);
+ Fn = SP.getFilename();
+ Dir = SP.getDirectory();
+ } else if (Scope.isLexicalBlockFile()) {
+ DILexicalBlockFile DBF(S);
+ Fn = DBF.getFilename();
+ Dir = DBF.getDirectory();
+ } else if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(S);
+ Fn = DB.getFilename();
+ Dir = DB.getDirectory();
+ } else
+ llvm_unreachable("Unexpected scope info");
+
+ Src = getOrCreateSourceID(Fn, Dir,
+ Asm->OutStreamer.getContext().getDwarfCompileUnitID());
+ }
+ Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+// Compute the size and offset of a DIE.
+unsigned
+DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
+ // Get the children.
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ // Record the abbreviation.
+ assignAbbrevNumber(Die->getAbbrev());
+
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1);
+
+ // Set DIE offset
+ Die->setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
+
+ const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+
+ // Size the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ // Size attribute value.
+ Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
+
+ // Size the DIE children if any.
+ if (!Children.empty()) {
+ assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
+ "Children flag not set");
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ Offset = computeSizeAndOffset(Children[j], Offset);
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die->setSize(Offset - Die->getOffset());
+ return Offset;
+}
+
+// Compute the size and offset of all the DIEs.
+void DwarfUnits::computeSizeAndOffsets() {
+ // Offset from the beginning of debug info section.
+ unsigned AccuOffset = 0;
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ (*I)->setDebugInfoOffset(AccuOffset);
+ unsigned Offset =
+ sizeof(int32_t) + // Length of Compilation Unit Info
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+
+ unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset);
+ AccuOffset += EndOffset;
+ }
+}
+
+// Emit initial Dwarf sections with a label at the start of each one.
+void DwarfDebug::emitSectionLabels() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Dwarf sections base addresses.
+ DwarfInfoSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+ DwarfAbbrevSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+ if (useSplitDwarf())
+ DwarfAbbrevDWOSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(),
+ "section_abbrev_dwo");
+ emitSectionSym(Asm, TLOF.getDwarfARangesSection());
+
+ if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
+ emitSectionSym(Asm, MacroInfo);
+
+ DwarfLineSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+ emitSectionSym(Asm, TLOF.getDwarfLocSection());
+ if (GenerateDwarfPubNamesSection)
+ emitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
+ emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+ DwarfStrSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
+ if (useSplitDwarf())
+ DwarfStrDWOSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+ DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+ "debug_range");
+
+ DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(),
+ "section_debug_loc");
+
+ TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+ emitSectionSym(Asm, TLOF.getDataSection());
+}
+
+// Recursively emits a debug information entry.
+void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1);
+
+ // Emit the code (index) for the abbreviation.
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" +
+ Twine::utohexstr(Die->getOffset()) + ":0x" +
+ Twine::utohexstr(Die->getSize()) + " " +
+ dwarf::TagString(Abbrev->getTag()));
+ Asm->EmitULEB128(AbbrevNumber);
+
+ const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ unsigned Attr = AbbrevData[i].getAttribute();
+ unsigned Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
+
+ switch (Attr) {
+ case dwarf::DW_AT_abstract_origin: {
+ DIEEntry *E = cast<DIEEntry>(Values[i]);
+ DIE *Origin = E->getEntry();
+ unsigned Addr = Origin->getOffset();
+ if (Form == dwarf::DW_FORM_ref_addr) {
+ // For DW_FORM_ref_addr, output the offset from beginning of debug info
+ // section. Origin->getOffset() returns the offset from start of the
+ // compile unit.
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Addr += Holder.getCUOffset(Origin->getCompileUnit());
+ }
+ Asm->EmitInt32(Addr);
+ break;
+ }
+ case dwarf::DW_AT_ranges: {
+ // DW_AT_range Value encodes offset in debug_range section.
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) {
+ Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ 4);
+ } else {
+ Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ DwarfDebugRangeSectionSym,
+ 4);
+ }
+ break;
+ }
+ case dwarf::DW_AT_location: {
+ if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ Asm->EmitLabelReference(L->getValue(), 4);
+ else
+ Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+ } else {
+ Values[i]->EmitValue(Asm, Form);
+ }
+ break;
+ }
+ case dwarf::DW_AT_accessibility: {
+ if (Asm->isVerbose()) {
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+ Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+ }
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ default:
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ emitDIE(Children[j], Abbrevs);
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("End Of Children Mark");
+ Asm->EmitInt8(0);
+ }
+}
+
+// Emit the various dwarf units to the unit section USection with
+// the abbreviations going into ASection.
+void DwarfUnits::emitUnits(DwarfDebug *DD,
+ const MCSection *USection,
+ const MCSection *ASection,
+ const MCSymbol *ASectionSym) {
+ Asm->OutStreamer.SwitchSection(USection);
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ CompileUnit *TheCU = *I;
+ DIE *Die = TheCU->getCUDie();
+
+ // Emit the compile units header.
+ Asm->OutStreamer
+ .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(),
+ TheCU->getUniqueID()));
+
+ // Emit size of content not including length itself
+ unsigned ContentSize = Die->getSize() +
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+
+ Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+ Asm->EmitInt32(ContentSize);
+ Asm->OutStreamer.AddComment("DWARF version number");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()),
+ ASectionSym);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+
+ DD->emitDIE(Die, Abbreviations);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(),
+ TheCU->getUniqueID()));
+ }
+}
+
+/// For a given compile unit DIE, returns offset from beginning of debug info.
+unsigned DwarfUnits::getCUOffset(DIE *Die) {
+ assert(Die->getTag() == dwarf::DW_TAG_compile_unit &&
+ "Input DIE should be compile unit in getCUOffset.");
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ CompileUnit *TheCU = *I;
+ if (TheCU->getCUDie() == Die)
+ return TheCU->getDebugInfoOffset();
+ }
+ llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits.");
+}
+
+// Emit the debug info section.
+void DwarfDebug::emitDebugInfo() {
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+
+ Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(),
+ Asm->getObjFileLowering().getDwarfAbbrevSection(),
+ DwarfAbbrevSectionSym);
+}
+
+// Emit the abbreviation section.
+void DwarfDebug::emitAbbreviations() {
+ if (!useSplitDwarf())
+ emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(),
+ &Abbreviations);
+ else
+ emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
+}
+
+void DwarfDebug::emitAbbrevs(const MCSection *Section,
+ std::vector<DIEAbbrev *> *Abbrevs) {
+ // Check to see if it is worth the effort.
+ if (!Abbrevs->empty()) {
+ // Start the debug abbrev section.
+ Asm->OutStreamer.SwitchSection(Section);
+
+ MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName());
+ Asm->OutStreamer.EmitLabel(Begin);
+
+ // For each abbrevation.
+ for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) {
+ // Get abbreviation data
+ const DIEAbbrev *Abbrev = Abbrevs->at(i);
+
+ // Emit the abbrevations code (base 1 index.)
+ Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(Asm);
+ }
+
+ // Mark end of abbreviations.
+ Asm->EmitULEB128(0, "EOM(3)");
+
+ MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName());
+ Asm->OutStreamer.EmitLabel(End);
+ }
+}
+
+// Emit the last address of the section and the end of the line matrix.
+void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
+ // Define last address of section.
+ Asm->OutStreamer.AddComment("Extended Op");
+ Asm->EmitInt8(0);
+
+ Asm->OutStreamer.AddComment("Op size");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
+ Asm->OutStreamer.AddComment("DW_LNE_set_address");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address);
+
+ Asm->OutStreamer.AddComment("Section end label");
+
+ Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
+ Asm->getDataLayout().getPointerSize());
+
+ // Mark end of matrix.
+ Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
+ Asm->EmitInt8(0);
+ Asm->EmitInt8(1);
+ Asm->EmitInt8(1);
+}
+
+// Emit visible names into a hashed accelerator table section.
+void DwarfDebug::emitAccelNames() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
+
+ AT.FinalizeTable(Asm, "Names");
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfAccelNamesSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, &InfoHolder);
+}
+
+// Emit objective C classes and categories into a hashed accelerator table
+// section.
+void DwarfDebug::emitAccelObjC() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
+
+ AT.FinalizeTable(Asm, "ObjC");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelObjCSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, &InfoHolder);
+}
+
+// Emit namespace dies into a hashed accelerator table.
+void DwarfDebug::emitAccelNamespaces() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
+
+ AT.FinalizeTable(Asm, "namespac");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelNamespaceSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, &InfoHolder);
+}
+
+// Emit type dies into a hashed accelerator table.
+void DwarfDebug::emitAccelTypes() {
+ std::vector<DwarfAccelTable::Atom> Atoms;
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag,
+ dwarf::DW_FORM_data2));
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags,
+ dwarf::DW_FORM_data1));
+ DwarfAccelTable AT(Atoms);
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names
+ = TheCU->getAccelTypes();
+ for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second;
+ for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI
+ = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI)
+ AT.AddName(Name, (*DI).first, (*DI).second);
+ }
+ }
+
+ AT.FinalizeTable(Asm, "types");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelTypesSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, &InfoHolder);
+}
+
+/// emitDebugPubnames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubnames() {
+ const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+
+ typedef DenseMap<const MDNode*, CompileUnit*> CUMapType;
+ for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ unsigned ID = TheCU->getUniqueID();
+
+ if (TheCU->getGlobalNames().empty())
+ continue;
+
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+ Asm->OutStreamer.AddComment("Length of Public Names Info");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID),
+ Asm->GetTempSymbol("pubnames_begin", ID), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID));
+
+ Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID),
+ Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobalNames();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const DIE *Entity = GI->second;
+
+ Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID));
+ }
+}
+
+void DwarfDebug::emitDebugPubTypes() {
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ // Start the dwarf pubtypes section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubTypesSection());
+ Asm->OutStreamer.AddComment("Length of Public Types Info");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()),
+ Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
+ TheCU->getUniqueID()));
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+ Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(),
+ TheCU->getUniqueID()),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(),
+ TheCU->getUniqueID()),
+ Asm->GetTempSymbol(ISec->getLabelBeginName(),
+ TheCU->getUniqueID()),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE *Entity = GI->second;
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+ // Emit the name with a terminating null byte.
+ Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1));
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
+ TheCU->getUniqueID()));
+ }
+}
+
+// Emit strings into a string section.
+void DwarfUnits::emitStrings(const MCSection *StrSection,
+ const MCSection *OffsetSection = NULL,
+ const MCSymbol *StrSecSym = NULL) {
+
+ if (StringPool.empty()) return;
+
+ // Start the dwarf str section.
+ Asm->OutStreamer.SwitchSection(StrSection);
+
+ // Get all of the string pool entries and put them in an array by their ID so
+ // we can sort them.
+ SmallVector<std::pair<unsigned,
+ StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+
+ for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
+ I = StringPool.begin(), E = StringPool.end();
+ I != E; ++I)
+ Entries.push_back(std::make_pair(I->second.second, &*I));
+
+ array_pod_sort(Entries.begin(), Entries.end());
+
+ for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+ // Emit a label for reference from debug information entries.
+ Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
+
+ // Emit the string itself with a terminating null byte.
+ Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(),
+ Entries[i].second->getKeyLength()+1));
+ }
+
+ // If we've got an offset section go ahead and emit that now as well.
+ if (OffsetSection) {
+ Asm->OutStreamer.SwitchSection(OffsetSection);
+ unsigned offset = 0;
+ unsigned size = 4; // FIXME: DWARF64 is 8.
+ for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+ Asm->OutStreamer.EmitIntValue(offset, size);
+ offset += Entries[i].second->getKeyLength() + 1;
+ }
+ }
+}
+
+// Emit strings into a string section.
+void DwarfUnits::emitAddresses(const MCSection *AddrSection) {
+
+ if (AddressPool.empty()) return;
+
+ // Start the dwarf addr section.
+ Asm->OutStreamer.SwitchSection(AddrSection);
+
+ // Get all of the string pool entries and put them in an array by their ID so
+ // we can sort them.
+ SmallVector<std::pair<unsigned,
+ std::pair<MCSymbol*, unsigned>* >, 64> Entries;
+
+ for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator
+ I = AddressPool.begin(), E = AddressPool.end();
+ I != E; ++I)
+ Entries.push_back(std::make_pair(I->second.second, &(I->second)));
+
+ array_pod_sort(Entries.begin(), Entries.end());
+
+ for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+ // Emit a label for reference from debug information entries.
+ MCSymbol *Sym = Entries[i].second->first;
+ if (Sym)
+ Asm->EmitLabelReference(Entries[i].second->first,
+ Asm->getDataLayout().getPointerSize());
+ else
+ Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize());
+ }
+
+}
+
+// Emit visible names into a debug str section.
+void DwarfDebug::emitDebugStr() {
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
+}
+
+// Emit visible names into a debug loc section.
+void DwarfDebug::emitDebugLoc() {
+ if (DotDebugLocEntries.empty())
+ return;
+
+ for (SmallVectorImpl<DotDebugLocEntry>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I) {
+ DotDebugLocEntry &Entry = *I;
+ if (I + 1 != DotDebugLocEntries.end())
+ Entry.Merge(I+1);
+ }
+
+ // Start the dwarf loc section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+ unsigned char Size = Asm->getDataLayout().getPointerSize();
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
+ unsigned index = 1;
+ for (SmallVectorImpl<DotDebugLocEntry>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I, ++index) {
+ DotDebugLocEntry &Entry = *I;
+ if (Entry.isMerged()) continue;
+ if (Entry.isEmpty()) {
+ Asm->OutStreamer.EmitIntValue(0, Size);
+ Asm->OutStreamer.EmitIntValue(0, Size);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
+ } else {
+ Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size);
+ Asm->OutStreamer.EmitSymbolValue(Entry.End, Size);
+ DIVariable DV(Entry.Variable);
+ Asm->OutStreamer.AddComment("Loc expr size");
+ MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
+ MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
+ Asm->EmitLabelDifference(end, begin, 2);
+ Asm->OutStreamer.EmitLabel(begin);
+ if (Entry.isInt()) {
+ DIBasicType BTy(DV.getType());
+ if (BTy.Verify() &&
+ (BTy.getEncoding() == dwarf::DW_ATE_signed
+ || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
+ Asm->OutStreamer.AddComment("DW_OP_consts");
+ Asm->EmitInt8(dwarf::DW_OP_consts);
+ Asm->EmitSLEB128(Entry.getInt());
+ } else {
+ Asm->OutStreamer.AddComment("DW_OP_constu");
+ Asm->EmitInt8(dwarf::DW_OP_constu);
+ Asm->EmitULEB128(Entry.getInt());
+ }
+ } else if (Entry.isLocation()) {
+ if (!DV.hasComplexAddress())
+ // Regular entry.
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ else {
+ // Complex address entry.
+ unsigned N = DV.getNumAddrElements();
+ unsigned i = 0;
+ if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+ if (Entry.Loc.getOffset()) {
+ i = 2;
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ Asm->OutStreamer.AddComment("DW_OP_deref");
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
+ Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+ Asm->EmitSLEB128(DV.getAddrElement(1));
+ } else {
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
+ Asm->EmitDwarfRegOp(Loc);
+ i = 2;
+ }
+ } else {
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ }
+
+ // Emit remaining complex address elements.
+ for (; i < N; ++i) {
+ uint64_t Element = DV.getAddrElement(i);
+ if (Element == DIBuilder::OpPlus) {
+ Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+ Asm->EmitULEB128(DV.getAddrElement(++i));
+ } else if (Element == DIBuilder::OpDeref) {
+ if (!Entry.Loc.isReg())
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ } else
+ llvm_unreachable("unknown Opcode found in complex address");
+ }
+ }
+ }
+ // else ... ignore constant fp. There is not any good way to
+ // to represent them here in dwarf.
+ Asm->OutStreamer.EmitLabel(end);
+ }
+ }
+}
+
+// Emit visible names into a debug aranges section.
+void DwarfDebug::emitDebugARanges() {
+ // Start the dwarf aranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfARangesSection());
+}
+
+// Emit visible names into a debug ranges section.
+void DwarfDebug::emitDebugRanges() {
+ // Start the dwarf ranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfRangesSection());
+ unsigned char Size = Asm->getDataLayout().getPointerSize();
+ for (SmallVectorImpl<const MCSymbol *>::iterator
+ I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
+ I != E; ++I) {
+ if (*I)
+ Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size);
+ else
+ Asm->OutStreamer.EmitIntValue(0, Size);
+ }
+}
+
+// Emit visible names into a debug macinfo section.
+void DwarfDebug::emitDebugMacInfo() {
+ if (const MCSection *LineInfo =
+ Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
+ // Start the dwarf macinfo section.
+ Asm->OutStreamer.SwitchSection(LineInfo);
+ }
+}
+
+// Emit inline info using following format.
+// Section Header:
+// 1. length of section
+// 2. Dwarf version number
+// 3. address size.
+//
+// Entries (one "entry" for each function that was inlined):
+//
+// 1. offset into __debug_str section for MIPS linkage name, if exists;
+// otherwise offset into __debug_str for regular function name.
+// 2. offset into __debug_str section for regular function name.
+// 3. an unsigned LEB128 number indicating the number of distinct inlining
+// instances for the function.
+//
+// The rest of the entry consists of a {die_offset, low_pc} pair for each
+// inlined instance; the die_offset points to the inlined_subroutine die in the
+// __debug_info section, and the low_pc is the starting address for the
+// inlining instance.
+void DwarfDebug::emitDebugInlineInfo() {
+ if (!Asm->MAI->doesDwarfUseInlineInfoSection())
+ return;
+
+ if (!FirstCU)
+ return;
+
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfDebugInlineSection());
+
+ Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1),
+ Asm->GetTempSymbol("debug_inlined_begin", 1), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1));
+
+ Asm->OutStreamer.AddComment("Dwarf Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+
+ for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(),
+ E = InlinedSPNodes.end(); I != E; ++I) {
+
+ const MDNode *Node = *I;
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+ = InlineInfo.find(Node);
+ SmallVectorImpl<InlineInfoLabels> &Labels = II->second;
+ DISubprogram SP(Node);
+ StringRef LName = SP.getLinkageName();
+ StringRef Name = SP.getName();
+
+ Asm->OutStreamer.AddComment("MIPS linkage name");
+ if (LName.empty())
+ Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
+ DwarfStrSectionSym);
+ else
+ Asm->EmitSectionOffset(InfoHolder
+ .getStringPoolEntry(getRealLinkageName(LName)),
+ DwarfStrSectionSym);
+
+ Asm->OutStreamer.AddComment("Function name");
+ Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
+ DwarfStrSectionSym);
+ Asm->EmitULEB128(Labels.size(), "Inline count");
+
+ for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(),
+ LE = Labels.end(); LI != LE; ++LI) {
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(LI->second->getOffset());
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
+ Asm->OutStreamer.EmitSymbolValue(LI->first,
+ Asm->getDataLayout().getPointerSize());
+ }
+ }
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
+}
+
+// DWARF5 Experimental Separate Dwarf emitters.
+
+// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
+// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
+// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present,
+// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa.
+CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
+ DICompileUnit DIUnit(N);
+ CompilationDir = DIUnit.getDirectory();
+
+ DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+ CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+ DIUnit.getLanguage(), Die, Asm,
+ this, &SkeletonHolder);
+
+ NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
+ DIUnit.getSplitDebugFilename());
+
+ // This should be a unique identifier when we want to build .dwp files.
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+
+ // FIXME: The addr base should be relative for each compile unit, however,
+ // this one is going to be 0 anyhow.
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
+
+ // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+ // into an entity. We're using 0, or a NULL label for this.
+ NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section.
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
+ DwarfLineSectionSym);
+ else
+ NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0);
+
+ if (!CompilationDir.empty())
+ NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+
+ SkeletonHolder.addUnit(NewCU);
+ SkeletonCUs.push_back(NewCU);
+
+ return NewCU;
+}
+
+void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) {
+ assert(useSplitDwarf() && "No split dwarf debug info?");
+ emitAbbrevs(Section, &SkeletonAbbrevs);
+}
+
+// Emit the .debug_info.dwo section for separated dwarf. This contains the
+// compile units that would normally be in debug_info.
+void DwarfDebug::emitDebugInfoDWO() {
+ assert(useSplitDwarf() && "No split dwarf debug info?");
+ InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(),
+ Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
+ DwarfAbbrevDWOSectionSym);
+}
+
+// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
+// abbreviations for the .debug_info.dwo section.
+void DwarfDebug::emitDebugAbbrevDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
+ &Abbreviations);
+}
+
+// Emit the .debug_str.dwo section for separated dwarf. This contains the
+// string section and is identical in format to traditional .debug_str
+// sections.
+void DwarfDebug::emitDebugStrDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ const MCSection *OffSec = Asm->getObjFileLowering()
+ .getDwarfStrOffDWOSection();
+ const MCSymbol *StrSym = DwarfStrSectionSym;
+ InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
+ OffSec, StrSym);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 0000000..81e345e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,649 @@
+//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+
+#include "DIE.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class CompileUnit;
+class ConstantInt;
+class ConstantFP;
+class DbgVariable;
+class MachineFrameInfo;
+class MachineModuleInfo;
+class MachineOperand;
+class MCAsmInfo;
+class DIEAbbrev;
+class DIE;
+class DIEBlock;
+class DIEEntry;
+class DwarfDebug;
+
+//===----------------------------------------------------------------------===//
+/// \brief This class is used to record source line correspondence.
+class SrcLineInfo {
+ unsigned Line; // Source line number.
+ unsigned Column; // Source column.
+ unsigned SourceID; // Source ID number.
+ MCSymbol *Label; // Label in code ID number.
+public:
+ SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
+ : Line(L), Column(C), SourceID(S), Label(label) {}
+
+ // Accessors
+ unsigned getLine() const { return Line; }
+ unsigned getColumn() const { return Column; }
+ unsigned getSourceID() const { return SourceID; }
+ MCSymbol *getLabel() const { return Label; }
+};
+
+/// \brief This struct describes location entries emitted in the .debug_loc
+/// section.
+typedef struct DotDebugLocEntry {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ MachineLocation Loc;
+ const MDNode *Variable;
+ bool Merged;
+ bool Constant;
+ enum EntryType {
+ E_Location,
+ E_Integer,
+ E_ConstantFP,
+ E_ConstantInt
+ };
+ enum EntryType EntryKind;
+
+ union {
+ int64_t Int;
+ const ConstantFP *CFP;
+ const ConstantInt *CIP;
+ } Constants;
+ DotDebugLocEntry()
+ : Begin(0), End(0), Variable(0), Merged(false),
+ Constant(false) { Constants.Int = 0;}
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
+ const MDNode *V)
+ : Begin(B), End(E), Loc(L), Variable(V), Merged(false),
+ Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
+ const ConstantInt *IPtr)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
+
+ /// \brief Empty entries are also used as a trigger to emit temp label. Such
+ /// labels are referenced is used to find debug_loc offset for a given DIE.
+ bool isEmpty() { return Begin == 0 && End == 0; }
+ bool isMerged() { return Merged; }
+ void Merge(DotDebugLocEntry *Next) {
+ if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+ return;
+ Next->Begin = Begin;
+ Merged = true;
+ }
+ bool isLocation() const { return EntryKind == E_Location; }
+ bool isInt() const { return EntryKind == E_Integer; }
+ bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+ bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+ int64_t getInt() { return Constants.Int; }
+ const ConstantFP *getConstantFP() { return Constants.CFP; }
+ const ConstantInt *getConstantInt() { return Constants.CIP; }
+} DotDebugLocEntry;
+
+//===----------------------------------------------------------------------===//
+/// \brief This class is used to track local variable information.
+class DbgVariable {
+ DIVariable Var; // Variable Descriptor.
+ DIE *TheDIE; // Variable DIE.
+ unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
+ DbgVariable *AbsVar; // Corresponding Abstract variable, if any.
+ const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
+ int FrameIndex;
+public:
+ // AbsVar may be NULL.
+ DbgVariable(DIVariable V, DbgVariable *AV)
+ : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
+ FrameIndex(~0) {}
+
+ // Accessors.
+ DIVariable getVariable() const { return Var; }
+ void setDIE(DIE *D) { TheDIE = D; }
+ DIE *getDIE() const { return TheDIE; }
+ void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
+ unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
+ StringRef getName() const { return Var.getName(); }
+ DbgVariable *getAbstractVariable() const { return AbsVar; }
+ const MachineInstr *getMInsn() const { return MInsn; }
+ void setMInsn(const MachineInstr *M) { MInsn = M; }
+ int getFrameIndex() const { return FrameIndex; }
+ void setFrameIndex(int FI) { FrameIndex = FI; }
+ // Translate tag to proper Dwarf tag.
+ unsigned getTag() const {
+ if (Var.getTag() == dwarf::DW_TAG_arg_variable)
+ return dwarf::DW_TAG_formal_parameter;
+
+ return dwarf::DW_TAG_variable;
+ }
+ /// \brief Return true if DbgVariable is artificial.
+ bool isArtificial() const {
+ if (Var.isArtificial())
+ return true;
+ if (getType().isArtificial())
+ return true;
+ return false;
+ }
+
+ bool isObjectPointer() const {
+ if (Var.isObjectPointer())
+ return true;
+ if (getType().isObjectPointer())
+ return true;
+ return false;
+ }
+
+ bool variableHasComplexAddress() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.hasComplexAddress();
+ }
+ bool isBlockByrefVariable() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.isBlockByrefVariable();
+ }
+ unsigned getNumAddrElements() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.getNumAddrElements();
+ }
+ uint64_t getAddrElement(unsigned i) const {
+ return Var.getAddrElement(i);
+ }
+ DIType getType() const;
+};
+
+
+// A String->Symbol mapping of strings used by indirect
+// references.
+typedef StringMap<std::pair<MCSymbol*, unsigned>,
+ BumpPtrAllocator&> StrPool;
+
+// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect
+// references.
+typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool;
+
+/// \brief Collects and handles information specific to a particular
+/// collection of units.
+class DwarfUnits {
+ // Target of Dwarf emission, used for sizing of abbreviations.
+ AsmPrinter *Asm;
+
+ // Used to uniquely define abbreviations.
+ FoldingSet<DIEAbbrev> *AbbreviationsSet;
+
+ // A list of all the unique abbreviations in use.
+ std::vector<DIEAbbrev *> *Abbreviations;
+
+ // A pointer to all units in the section.
+ SmallVector<CompileUnit *, 1> CUs;
+
+ // Collection of strings for this unit and assorted symbols.
+ StrPool StringPool;
+ unsigned NextStringPoolNumber;
+ std::string StringPref;
+
+ // Collection of addresses for this unit and assorted labels.
+ AddrPool AddressPool;
+ unsigned NextAddrPoolNumber;
+
+public:
+ DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS,
+ std::vector<DIEAbbrev *> *A, const char *Pref,
+ BumpPtrAllocator &DA) :
+ Asm(AP), AbbreviationsSet(AS), Abbreviations(A),
+ StringPool(DA), NextStringPoolNumber(0), StringPref(Pref),
+ AddressPool(), NextAddrPoolNumber(0) {}
+
+ /// \brief Compute the size and offset of a DIE given an incoming Offset.
+ unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
+
+ /// \brief Compute the size and offset of all the DIEs.
+ void computeSizeAndOffsets();
+
+ /// \brief Define a unique number for the abbreviation.
+ void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+ /// \brief Add a unit to the list of CUs.
+ void addUnit(CompileUnit *CU) { CUs.push_back(CU); }
+
+ /// \brief Emit all of the units to the section listed with the given
+ /// abbreviation section.
+ void emitUnits(DwarfDebug *, const MCSection *, const MCSection *,
+ const MCSymbol *);
+
+ /// \brief Emit all of the strings to the section given.
+ void emitStrings(const MCSection *, const MCSection *, const MCSymbol *);
+
+ /// \brief Emit all of the addresses to the section given.
+ void emitAddresses(const MCSection *);
+
+ /// \brief Returns the entry into the start of the pool.
+ MCSymbol *getStringPoolSym();
+
+ /// \brief Returns an entry into the string pool with the given
+ /// string text.
+ MCSymbol *getStringPoolEntry(StringRef Str);
+
+ /// \brief Returns the index into the string pool with the given
+ /// string text.
+ unsigned getStringPoolIndex(StringRef Str);
+
+ /// \brief Returns the string pool.
+ StrPool *getStringPool() { return &StringPool; }
+
+ /// \brief Returns the index into the address pool with the given
+ /// label/symbol.
+ unsigned getAddrPoolIndex(MCSymbol *);
+
+ /// \brief Returns the address pool.
+ AddrPool *getAddrPool() { return &AddressPool; }
+
+ /// \brief for a given compile unit DIE, returns offset from beginning of
+ /// debug info.
+ unsigned getCUOffset(DIE *Die);
+};
+
+/// \brief Collects and handles dwarf debug information.
+class DwarfDebug {
+ // Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ // Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ // All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
+ //===--------------------------------------------------------------------===//
+ // Attribute used to construct specific Dwarf sections.
+ //
+
+ CompileUnit *FirstCU;
+
+ // Maps MDNode with its corresponding CompileUnit.
+ DenseMap <const MDNode *, CompileUnit *> CUMap;
+
+ // Maps subprogram MDNode with its corresponding CompileUnit.
+ DenseMap <const MDNode *, CompileUnit *> SPMap;
+
+ // Used to uniquely define abbreviations.
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+ // A list of all the unique abbreviations in use.
+ std::vector<DIEAbbrev *> Abbreviations;
+
+ // Stores the current file ID for a given compile unit.
+ DenseMap <unsigned, unsigned> FileIDCUMap;
+ // Source id map, i.e. CUID, source filename and directory,
+ // separated by a zero byte, mapped to a unique id.
+ StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
+
+ // Provides a unique id per text section.
+ SetVector<const MCSection*> SectionMap;
+
+ // List of Arguments (DbgValues) for current function.
+ SmallVector<DbgVariable *, 8> CurrentFnArguments;
+
+ LexicalScopes LScopes;
+
+ // Collection of abstract subprogram DIEs.
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
+ // Collection of dbg variables of a scope.
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
+
+ // Collection of abstract variables.
+ DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
+
+ // Collection of DotDebugLocEntry.
+ SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
+
+ // Collection of subprogram DIEs that are marked (at the end of the module)
+ // as DW_AT_inline.
+ SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+
+ // Keep track of inlined functions and their location. This
+ // information is used to populate the debug_inlined section.
+ typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+ SmallVector<const MDNode *, 4> InlinedSPNodes;
+
+ // This is a collection of subprogram MDNodes that are processed to
+ // create DIEs.
+ SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+
+ // Maps instruction with label emitted before instruction.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+ // Maps instruction with label emitted after instruction.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+ // Every user variable mentioned by a DBG_VALUE instruction in order of
+ // appearance.
+ SmallVector<const MDNode*, 8> UserVariables;
+
+ // For each user variable, keep a list of DBG_VALUE instructions in order.
+ // The list can also contain normal instructions that clobber the previous
+ // DBG_VALUE.
+ typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
+ DbgValueHistoryMap;
+ DbgValueHistoryMap DbgValues;
+
+ SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
+
+ // Previous instruction's location information. This is used to determine
+ // label location to indicate scope boundries in dwarf debug info.
+ DebugLoc PrevInstLoc;
+ MCSymbol *PrevLabel;
+
+ // This location indicates end of function prologue and beginning of function
+ // body.
+ DebugLoc PrologEndLoc;
+
+ struct FunctionDebugFrameInfo {
+ unsigned Number;
+ std::vector<MachineMove> Moves;
+
+ FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M)
+ : Number(Num), Moves(M) {}
+ };
+
+ std::vector<FunctionDebugFrameInfo> DebugFrames;
+
+ // Section Symbols: these are assembler temporary labels that are emitted at
+ // the beginning of each supported dwarf section. These are used to form
+ // section offsets and are created by EmitSectionLabels.
+ MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
+ MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
+ MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym;
+ MCSymbol *FunctionBeginSym, *FunctionEndSym;
+ MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
+
+ // As an optimization, there is no need to emit an entry in the directory
+ // table for the same directory as DW_at_comp_dir.
+ StringRef CompilationDir;
+
+ // Counter for assigning globally unique IDs for CUs.
+ unsigned GlobalCUIndexCount;
+
+ // Holder for the file specific debug information.
+ DwarfUnits InfoHolder;
+
+ // Holders for the various debug information flags that we might need to
+ // have exposed. See accessor functions below for description.
+
+ // Whether or not we're emitting info for older versions of gdb on darwin.
+ bool IsDarwinGDBCompat;
+
+ // DWARF5 Experimental Options
+ bool HasDwarfAccelTables;
+ bool HasSplitDwarf;
+
+ // Separated Dwarf Variables
+ // In general these will all be for bits that are left in the
+ // original object file, rather than things that are meant
+ // to be in the .dwo sections.
+
+ // The CUs left in the original object file for separated debug info.
+ SmallVector<CompileUnit *, 1> SkeletonCUs;
+
+ // Used to uniquely define abbreviations for the skeleton emission.
+ FoldingSet<DIEAbbrev> SkeletonAbbrevSet;
+
+ // A list of all the unique abbreviations in use.
+ std::vector<DIEAbbrev *> SkeletonAbbrevs;
+
+ // Holder for the skeleton information.
+ DwarfUnits SkeletonHolder;
+
+private:
+
+ void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+
+ /// \brief Find abstract variable associated with Var.
+ DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
+
+ /// \brief Find DIE for the given subprogram and attach appropriate
+ /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
+ /// variables in this scope then create and insert DIEs for these
+ /// variables.
+ DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode);
+
+ /// \brief Construct new DW_TAG_lexical_block for this scope and
+ /// attach DW_AT_low_pc/DW_AT_high_pc labels.
+ DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+
+ /// \brief This scope represents inlined body of a function. Construct
+ /// DIE to represent this concrete inlined copy of the function.
+ DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+
+ /// \brief Construct a DIE for this scope.
+ DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+
+ /// \brief Emit initial Dwarf sections with a label at the start of each one.
+ void emitSectionLabels();
+
+ /// \brief Compute the size and offset of a DIE given an incoming Offset.
+ unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
+
+ /// \brief Compute the size and offset of all the DIEs.
+ void computeSizeAndOffsets();
+
+ /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ void computeInlinedDIEs();
+
+ /// \brief Collect info for variables that were optimized out.
+ void collectDeadVariables();
+
+ /// \brief Finish off debug information after all functions have been
+ /// processed.
+ void finalizeModuleInfo();
+
+ /// \brief Emit labels to close any remaining sections that have been left
+ /// open.
+ void endSections();
+
+ /// \brief Emit a set of abbreviations to the specific section.
+ void emitAbbrevs(const MCSection *, std::vector<DIEAbbrev*> *);
+
+ /// \brief Emit the debug info section.
+ void emitDebugInfo();
+
+ /// \brief Emit the abbreviation section.
+ void emitAbbreviations();
+
+ /// \brief Emit the last address of the section and the end of
+ /// the line matrix.
+ void emitEndOfLineMatrix(unsigned SectionEnd);
+
+ /// \brief Emit visible names into a hashed accelerator table section.
+ void emitAccelNames();
+
+ /// \brief Emit objective C classes and categories into a hashed
+ /// accelerator table section.
+ void emitAccelObjC();
+
+ /// \brief Emit namespace dies into a hashed accelerator table.
+ void emitAccelNamespaces();
+
+ /// \brief Emit type dies into a hashed accelerator table.
+ void emitAccelTypes();
+
+ /// \brief Emit visible names into a debug pubnames section.
+ void emitDebugPubnames();
+
+ /// \brief Emit visible types into a debug pubtypes section.
+ void emitDebugPubTypes();
+
+ /// \brief Emit visible names into a debug str section.
+ void emitDebugStr();
+
+ /// \brief Emit visible names into a debug loc section.
+ void emitDebugLoc();
+
+ /// \brief Emit visible names into a debug aranges section.
+ void emitDebugARanges();
+
+ /// \brief Emit visible names into a debug ranges section.
+ void emitDebugRanges();
+
+ /// \brief Emit visible names into a debug macinfo section.
+ void emitDebugMacInfo();
+
+ /// \brief Emit inline info using custom format.
+ void emitDebugInlineInfo();
+
+ /// DWARF 5 Experimental Split Dwarf Emitters
+
+ /// \brief Construct the split debug info compile unit for the debug info
+ /// section.
+ CompileUnit *constructSkeletonCU(const MDNode *);
+
+ /// \brief Emit the local split abbreviations.
+ void emitSkeletonAbbrevs(const MCSection *);
+
+ /// \brief Emit the debug info dwo section.
+ void emitDebugInfoDWO();
+
+ /// \brief Emit the debug abbrev dwo section.
+ void emitDebugAbbrevDWO();
+
+ /// \brief Emit the debug str dwo section.
+ void emitDebugStrDWO();
+
+ /// \brief Create new CompileUnit for the given metadata node with tag
+ /// DW_TAG_compile_unit.
+ CompileUnit *constructCompileUnit(const MDNode *N);
+
+ /// \brief Construct subprogram DIE.
+ void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
+
+ /// \brief Register a source line with debug info. Returns the unique
+ /// label that was emitted and which provides correspondence to the
+ /// source line list.
+ void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
+ unsigned Flags);
+
+ /// \brief Indentify instructions that are marking the beginning of or
+ /// ending of a scope.
+ void identifyScopeMarkers();
+
+ /// \brief If Var is an current function argument that add it in
+ /// CurrentFnArguments list.
+ bool addCurrentFnArgument(const MachineFunction *MF,
+ DbgVariable *Var, LexicalScope *Scope);
+
+ /// \brief Populate LexicalScope entries with variables' info.
+ void collectVariableInfo(const MachineFunction *,
+ SmallPtrSet<const MDNode *, 16> &ProcessedVars);
+
+ /// \brief Collect variable information from the side table maintained
+ /// by MMI.
+ void collectVariableInfoFromMMITable(const MachineFunction * MF,
+ SmallPtrSet<const MDNode *, 16> &P);
+
+ /// \brief Ensure that a label will be emitted before MI.
+ void requestLabelBeforeInsn(const MachineInstr *MI) {
+ LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+ }
+
+ /// \brief Return Label preceding the instruction.
+ MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+ /// \brief Ensure that a label will be emitted after MI.
+ void requestLabelAfterInsn(const MachineInstr *MI) {
+ LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+ }
+
+ /// \brief Return Label immediately following the instruction.
+ MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfDebug(AsmPrinter *A, Module *M);
+ ~DwarfDebug();
+
+ /// \brief Emit all Dwarf sections that should come prior to the
+ /// content.
+ void beginModule();
+
+ /// \brief Emit all Dwarf sections that should come after the content.
+ void endModule();
+
+ /// \brief Gather pre-function debug information.
+ void beginFunction(const MachineFunction *MF);
+
+ /// \brief Gather and emit post-function debug information.
+ void endFunction(const MachineFunction *MF);
+
+ /// \brief Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI);
+
+ /// \brief Process end of an instruction.
+ void endInstruction(const MachineInstr *MI);
+
+ /// \brief Look up the source id with the given directory and source file
+ /// names. If none currently exists, create a new id and insert it in the
+ /// SourceIds map.
+ unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName,
+ unsigned CUID);
+
+ /// \brief Recursively Emits a debug information entry.
+ void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs);
+
+ /// \brief Returns whether or not to limit some of our debug
+ /// output to the limitations of darwin gdb.
+ bool useDarwinGDBCompat() { return IsDarwinGDBCompat; }
+
+ // Experimental DWARF5 features.
+
+ /// \brief Returns whether or not to emit tables that dwarf consumers can
+ /// use to accelerate lookup.
+ bool useDwarfAccelTables() { return HasDwarfAccelTables; }
+
+ /// \brief Returns whether or not to change the current debug info for the
+ /// split dwarf proposal support.
+ bool useSplitDwarf() { return HasSplitDwarf; }
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
new file mode 100644
index 0000000..7133458
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -0,0 +1,736 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+DwarfException::DwarfException(AsmPrinter *A)
+ : Asm(A), MMI(Asm->MMI) {}
+
+DwarfException::~DwarfException() {}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+/// PadLT - Order landing pads lexicographically by type id.
+bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+/// ComputeActionsTable - Compute the actions table and gather the first action
+/// index for each landing pad site.
+unsigned DwarfException::
+ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions) {
+
+ // The action table follows the call-site table in the LSDA. The individual
+ // records are of two types:
+ //
+ // * Catch clause
+ // * Exception specification
+ //
+ // The two record kinds have the same format, with only small differences.
+ // They are distinguished by the "switch value" field: Catch clauses
+ // (TypeInfos) have strictly positive switch values, and exception
+ // specifications (FilterIds) have strictly negative switch values. Value 0
+ // indicates a catch-all clause.
+ //
+ // Negative type IDs index into FilterIds. Positive type IDs index into
+ // TypeInfos. The value written for a positive type ID is just the type ID
+ // itself. For a negative type ID, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type ID (because the FilterIds entries are
+ // written using a variable width encoding, which outputs one byte per entry
+ // as long as the value written is not too large) but can differ. This kind
+ // of complication does not occur for positive type IDs because type infos are
+ // output using a fixed width encoding. FilterOffsets[i] holds the byte
+ // offset corresponding to FilterIds[i].
+
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= MCAsmInfo::getULEB128Size(*I);
+ }
+
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ const LandingPadInfo *PrevLPI = 0;
+
+ for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+ I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+ const LandingPadInfo *LPI = *I;
+ const std::vector<int> &TypeIds = LPI->TypeIds;
+ unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ unsigned PrevAction = (unsigned)-1;
+
+ if (NumShared) {
+ unsigned SizePrevIds = PrevLPI->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = Actions.size() - 1;
+ SizeAction =
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
+ SizeAction -=
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeAction += -Actions[PrevAction].NextAction;
+ PrevAction = Actions[PrevAction].Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+ int TypeID = TypeIds[J];
+ assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
+ Actions.push_back(Action);
+ PrevAction = Actions.size() - 1;
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ // Information used when created the call-site table. The action record
+ // field of the call site record is the offset of the first associated
+ // action record, relative to the start of the actions table. This value is
+ // biased by 1 (1 indicating the start of the actions table), and 0
+ // indicates that there are no actions.
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+
+ PrevLPI = LPI;
+ }
+
+ return SizeActions;
+}
+
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+ assert(MI->isCall() && "This should be a call instruction!");
+
+ bool MarkedNoUnwind = false;
+ bool SawFunc = false;
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+
+ if (!MO.isGlobal()) continue;
+
+ const Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (F == 0) continue;
+
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
+ }
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
+ }
+
+ return MarkedNoUnwind;
+}
+
+/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke
+/// has a try-range containing the call, a non-zero landing pad, and an
+/// appropriate action. The entry for an ordinary call has a try-range
+/// containing the call and zero for the landing pad and the action. Calls
+/// marked 'nounwind' have no entry and must not be contained in the try-range
+/// of any entry - they form gaps in the table. Entries must be ordered by
+/// try-range address.
+void DwarfException::
+ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ // The end label of the previous invoke or nounwind try-range.
+ MCSymbol *LastLabel = 0;
+
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+
+ // Whether the last CallSite entry was for an invoke.
+ bool PreviousIsInvoke = false;
+
+ // Visit all instructions in order of address.
+ for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ if (MI->isCall())
+ SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+ continue;
+ }
+
+ // End of the previous try-range?
+ MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+
+ // Beginning of a new try-range?
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+
+ const PadRange &P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // For Dwarf exception handling (SjLj handling doesn't use this). If some
+ // instruction between the previous try-range and this one may throw,
+ // create a call-site entry with no landing pad for the region between the
+ // try-ranges.
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
+ CallSites.push_back(Site);
+ PreviousIsInvoke = false;
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+ if (!LandingPad->LandingPadLabel) {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ } else {
+ // This try-range is for an invoke.
+ CallSiteEntry Site = {
+ BeginLabel,
+ LastLabel,
+ LandingPad->LandingPadLabel,
+ FirstActions[P.PadIndex]
+ };
+
+ // Try to merge with the previous call-site. SJLJ doesn't do this
+ if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ if (Asm->MAI->isExceptionHandlingDwarf())
+ CallSites.push_back(Site);
+ else {
+ // SjLj EH must maintain the call sites in the order assigned
+ // to them by the SjLjPrepare pass.
+ unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+ if (CallSites.size() < SiteNo)
+ CallSites.resize(SiteNo);
+ CallSites[SiteNo - 1] = Site;
+ }
+ PreviousIsInvoke = true;
+ }
+ }
+ }
+
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry Site = { LastLabel, 0, 0, 0 };
+ CallSites.push_back(Site);
+ }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy. First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+/// 1. The landing pad site information describes the range of code covered by
+/// the try. In our case it's an accumulation of the ranges covered by the
+/// invokes in the try. There is also a reference to the landing pad that
+/// handles the exception once processed. Finally an index into the actions
+/// table.
+/// 2. The action table, in our case, is composed of pairs of type IDs and next
+/// action offset. Starting with the action index from the landing pad
+/// site, each type ID is checked for a match to the current exception. If
+/// it matches then the exception and type id are passed on to the landing
+/// pad. Otherwise the next action is looked up. This chain is terminated
+/// with a next action of zero. If no type id is found then the frame is
+/// unwound and handling continues.
+/// 3. Type ID table contains references to all the C++ typeinfo for all
+/// catches in the function. This tables is reverse indexed base 1.
+void DwarfException::EmitExceptionTable() {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
+
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced when using DWARF exception handling.
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ // Compute the call-site table.
+ SmallVector<CallSiteEntry, 64> CallSites;
+ ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+
+ // Final tallies.
+
+ // Call sites.
+ bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+ bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+
+ unsigned CallSiteTableLength;
+ if (IsSJLJ)
+ CallSiteTableLength = 0;
+ else {
+ unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
+ CallSiteTableLength =
+ CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
+ }
+
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+ if (IsSJLJ)
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
+ }
+
+ // Type infos.
+ const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+ unsigned TTypeEncoding;
+ unsigned TypeFormatSize;
+
+ if (!HaveTTData) {
+ // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+ // that we're omitting that bit.
+ TTypeEncoding = dwarf::DW_EH_PE_omit;
+ // dwarf::DW_EH_PE_absptr
+ TypeFormatSize = Asm->getDataLayout().getPointerSize();
+ } else {
+ // Okay, we have actual filters or typeinfos to emit. As such, we need to
+ // pick a type encoding for them. We're about to emit a list of pointers to
+ // typeinfo objects at the end of the LSDA. However, unless we're in static
+ // mode, this reference will require a relocation by the dynamic linker.
+ //
+ // Because of this, we have a couple of options:
+ //
+ // 1) If we are in -static mode, we can always use an absolute reference
+ // from the LSDA, because the static linker will resolve it.
+ //
+ // 2) Otherwise, if the LSDA section is writable, we can output the direct
+ // reference to the typeinfo and allow the dynamic linker to relocate
+ // it. Since it is in a writable section, the dynamic linker won't
+ // have a problem.
+ //
+ // 3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+ // we need to use some form of indirection. For example, on Darwin,
+ // we can output a statically-relocatable reference to a dyld stub. The
+ // offset to the stub is constant, but the contents are in a section
+ // that is updated by the dynamic linker. This is easy enough, but we
+ // need to tell the personality function of the unwinder to indirect
+ // through the dyld stub.
+ //
+ // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+ // somewhere. This predicate should be moved to a shared location that is
+ // in target-independent code.
+ //
+ TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+ TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
+ }
+
+ // Begin the exception table.
+ // Sometimes we want not to emit the data into separate section (e.g. ARM
+ // EHABI). In this case LSDASection will be NULL.
+ if (LSDASection)
+ Asm->OutStreamer.SwitchSection(LSDASection);
+ Asm->EmitAlignment(2);
+
+ // Emit the LSDA.
+ MCSymbol *GCCETSym =
+ Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
+ Twine(Asm->getFunctionNumber()));
+ Asm->OutStreamer.EmitLabel(GCCETSym);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()));
+
+ if (IsSJLJ)
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
+ Asm->getFunctionNumber()));
+
+ // Emit the LSDA header.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ Asm->EmitEncodingByte(TTypeEncoding, "@TType");
+
+ // The type infos need to be aligned. GCC does this by inserting padding just
+ // before the type infos. However, this changes the size of the exception
+ // table, so you need to take this into account when you output the exception
+ // table size. However, the size is output using a variable length encoding.
+ // So by increasing the size by inserting padding, you may increase the number
+ // of bytes used for writing the size. If it increases, say by one byte, then
+ // you now need to output one less byte of padding to get the type infos
+ // aligned. However this decreases the size of the exception table. This
+ // changes the value you have to output for the exception table size. Due to
+ // the variable length encoding, the number of bytes used for writing the
+ // length may decrease. If so, you then have to increase the amount of
+ // padding. And so on. If you look carefully at the GCC code you will see that
+ // it indeed does this in a loop, going on and on until the values stabilize.
+ // We chose another solution: don't output padding inside the table like GCC
+ // does, instead output it before the table.
+ unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+ unsigned CallSiteTableLengthSize =
+ MCAsmInfo::getULEB128Size(CallSiteTableLength);
+ unsigned TTypeBaseOffset =
+ sizeof(int8_t) + // Call site format
+ CallSiteTableLengthSize + // Call site table length size
+ CallSiteTableLength + // Call site table length
+ SizeActions + // Actions size
+ SizeTypes;
+ unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+ unsigned TotalSize =
+ sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size
+ TTypeBaseOffset; // TType base offset
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ if (HaveTTData) {
+ // Account for any extra padding that will be added to the call site table
+ // length.
+ Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+ SizeAlign = 0;
+ }
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ // SjLj Exception handling
+ if (IsSJLJ) {
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ // Emit the landing pad site information.
+ unsigned idx = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+ const CallSiteEntry &S = *I;
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (VerboseAsm) {
+ Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<");
+ Asm->OutStreamer.AddComment(" On exception at call site "+Twine(idx));
+ }
+ Asm->EmitULEB128(idx);
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer.AddComment(" Action: cleanup");
+ else
+ Asm->OutStreamer.AddComment(" Action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->EmitULEB128(S.Action);
+ }
+ } else {
+ // DWARF Exception handling
+ assert(Asm->MAI->isExceptionHandlingDwarf());
+
+ // The call-site table is a list of all call sites that may throw an
+ // exception (including C++ 'throw' statements) in the procedure
+ // fragment. It immediately follows the LSDA header. Each entry indicates,
+ // for a given call, the first corresponding action record and corresponding
+ // landing pad.
+ //
+ // The table begins with the number of bytes, stored as an LEB128
+ // compressed, unsigned integer. The records immediately follow the record
+ // count. They are sorted in increasing call-site address. Each record
+ // indicates:
+ //
+ // * The position of the call-site.
+ // * The position of the landing pad.
+ // * The first action record for that call site.
+ //
+ // A missing entry in the call-site table indicates that a call is not
+ // supposed to throw.
+
+ // Emit the landing pad call site table.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ unsigned Entry = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+ const CallSiteEntry &S = *I;
+
+ MCSymbol *EHFuncBeginSym =
+ Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+
+ MCSymbol *BeginLabel = S.BeginLabel;
+ if (BeginLabel == 0)
+ BeginLabel = EHFuncBeginSym;
+ MCSymbol *EndLabel = S.EndLabel;
+ if (EndLabel == 0)
+ EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+
+
+ // Offset of the call site relative to the previous call site, counted in
+ // number of 16-byte bundles. The first call site is counted relative to
+ // the start of the procedure fragment.
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<");
+ Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(" Call between ") +
+ BeginLabel->getName() + " and " +
+ EndLabel->getName());
+ Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (!S.PadLabel) {
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(" has no landing pad");
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/);
+ } else {
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(" jumps to ") +
+ S.PadLabel->getName());
+ Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+ }
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer.AddComment(" On action: cleanup");
+ else
+ Asm->OutStreamer.AddComment(" On action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->EmitULEB128(S.Action);
+ }
+ }
+
+ // Emit the Action Table.
+ int Entry = 0;
+ for (SmallVectorImpl<ActionEntry>::const_iterator
+ I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+ const ActionEntry &Action = *I;
+
+ if (VerboseAsm) {
+ // Emit comments that decode the action table.
+ Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<");
+ }
+
+ // Type Filter
+ //
+ // Used by the runtime to match the type of the thrown exception to the
+ // type of the catch clauses or the types in the exception specification.
+ if (VerboseAsm) {
+ if (Action.ValueForTypeID > 0)
+ Asm->OutStreamer.AddComment(" Catch TypeInfo " +
+ Twine(Action.ValueForTypeID));
+ else if (Action.ValueForTypeID < 0)
+ Asm->OutStreamer.AddComment(" Filter TypeInfo " +
+ Twine(Action.ValueForTypeID));
+ else
+ Asm->OutStreamer.AddComment(" Cleanup");
+ }
+ Asm->EmitSLEB128(Action.ValueForTypeID);
+
+ // Action Record
+ //
+ // Self-relative signed displacement in bytes of the next action record,
+ // or 0 if there is no next action record.
+ if (VerboseAsm) {
+ if (Action.NextAction == 0) {
+ Asm->OutStreamer.AddComment(" No further actions");
+ } else {
+ unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
+ Asm->OutStreamer.AddComment(" Continue to action "+Twine(NextAction));
+ }
+ }
+ Asm->EmitSLEB128(Action.NextAction);
+ }
+
+ EmitTypeInfos(TTypeEncoding);
+
+ Asm->EmitAlignment(2);
+}
+
+void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ int Entry = 0;
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+ const GlobalVariable *GV = *I;
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+ Asm->EmitTTypeReference(GV, TTypeEncoding);
+ }
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (TypeID != 0)
+ Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+ }
+
+ Asm->EmitULEB128(TypeID);
+ }
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfException::EndModule() {
+ llvm_unreachable("Should be implemented");
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfException::BeginFunction(const MachineFunction *MF) {
+ llvm_unreachable("Should be implemented");
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfException::EndFunction() {
+ llvm_unreachable("Should be implemented");
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 0000000..74b1b13
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,234 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include <vector>
+
+namespace llvm {
+
+template <typename T> class SmallVectorImpl;
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MachineMove;
+class MachineInstr;
+class MachineFunction;
+class MCAsmInfo;
+class MCExpr;
+class MCSymbol;
+class Function;
+class AsmPrinter;
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives.
+///
+class DwarfException {
+protected:
+ /// Asm - Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ /// MMI - Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// SharedTypeIds - How many leading type ids two landing pads have in common.
+ static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// PadLT - Order landing pads lexicographically by type id.
+ static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
+
+ /// PadRange - Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+
+ /// ActionEntry - Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ unsigned Previous;
+ };
+
+ /// CallSiteEntry - Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ MCSymbol *BeginLabel; // zero indicates the start of the function.
+ MCSymbol *EndLabel; // zero indicates the end of the function.
+
+ // The landing pad starts at PadLabel.
+ MCSymbol *PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+ };
+
+ /// ComputeActionsTable - Compute the actions table and gather the first
+ /// action index for each landing pad site.
+ unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
+
+ /// CallToNoUnwindFunction - Return `true' if this is a call to a function
+ /// marked `nounwind'. Return `false' otherwise.
+ bool CallToNoUnwindFunction(const MachineInstr *MI);
+
+ /// ComputeCallSiteTable - Compute the call-site table. The entry for an
+ /// invoke has a try-range containing the call, a non-zero landing pad and an
+ /// appropriate action. The entry for an ordinary call has a try-range
+ /// containing the call and zero for the landing pad and the action. Calls
+ /// marked 'nounwind' have no entry and must not be contained in the try-range
+ /// of any entry - they form gaps in the table. Entries must be ordered by
+ /// try-range address.
+ void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LPs,
+ const SmallVectorImpl<unsigned> &FirstActions);
+
+ /// EmitExceptionTable - Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+ void EmitExceptionTable();
+
+ virtual void EmitTypeInfos(unsigned TTypeEncoding);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfException(AsmPrinter *A);
+ virtual ~DwarfException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+ /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
+ /// should be emitted.
+ bool shouldEmitPersonality;
+
+ /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
+ /// should be emitted.
+ bool shouldEmitLSDA;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ AsmPrinter::CFIMoveType moveTypeModule;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfCFIException(AsmPrinter *A);
+ virtual ~DwarfCFIException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class ARMException : public DwarfException {
+ void EmitTypeInfos(unsigned TTypeEncoding);
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ ARMException(AsmPrinter *A);
+ virtual ~ARMException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class Win64Exception : public DwarfException {
+ /// shouldEmitPersonality - Per-function flag to indicate if personality
+ /// info should be emitted.
+ bool shouldEmitPersonality;
+
+ /// shouldEmitLSDA - Per-function flag to indicate if the LSDA
+ /// should be emitted.
+ bool shouldEmitLSDA;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ Win64Exception(AsmPrinter *A);
+ virtual ~Win64Exception();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
new file mode 100644
index 0000000..a8fb66d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -0,0 +1,120 @@
+//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the compiler plugin that is used in order to emit
+// garbage collection information in a convenient layout for parsing and
+// loading in the Erlang/OTP runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ErlangGCPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(AsmPrinter &AP);
+ void finishAssembly(AsmPrinter &AP);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGCPrinter() { }
+
+void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { }
+
+void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
+ MCStreamer &OS = AP.OutStreamer;
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+
+ // Put this in a custom .note section.
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
+ .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0,
+ SectionKind::getDataRel()));
+
+ // For each function...
+ for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+ GCFunctionInfo &MD = **FI;
+
+ /** A compact GC layout. Emit this data structure:
+ *
+ * struct {
+ * int16_t PointCount;
+ * void *SafePointAddress[PointCount];
+ * int16_t StackFrameSize; (in words)
+ * int16_t StackArity;
+ * int16_t LiveCount;
+ * int16_t LiveOffsets[LiveCount];
+ * } __gcmap_<FUNCTIONNAME>;
+ **/
+
+ // Align to address width.
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ // Emit PointCount.
+ OS.AddComment("safe point count");
+ AP.EmitInt16(MD.size());
+
+ // And each safe point...
+ for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE;
+ ++PI) {
+ // Emit the address of the safe point.
+ OS.AddComment("safe point address");
+ MCSymbol *Label = PI->Label;
+ AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+ }
+
+ // Stack information never change in safe points! Only print info from the
+ // first call-site.
+ GCFunctionInfo::iterator PI = MD.begin();
+
+ // Emit the stack frame size.
+ OS.AddComment("stack frame size (in words)");
+ AP.EmitInt16(MD.getFrameSize() / IntPtrSize);
+
+ // Emit stack arity, i.e. the number of stacked arguments.
+ unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
+ unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
+ MD.getFunction().arg_size() - RegisteredArgs : 0;
+ OS.AddComment("stack arity");
+ AP.EmitInt16(StackArity);
+
+ // Emit the number of live roots in the function.
+ OS.AddComment("live root count");
+ AP.EmitInt16(MD.live_size(PI));
+
+ // And for each live root...
+ for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+ LE = MD.live_end(PI);
+ LI != LE; ++LI) {
+ // Emit live root's offset within the stack frame.
+ OS.AddComment("stack index (offset / wordsize)");
+ AP.EmitInt16(LI->StackOffset / IntPtrSize);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 0000000..98177c0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+ class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(AsmPrinter &AP);
+ void finishAssembly(AsmPrinter &AP);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() { }
+
+static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
+ const std::string &MId = M.getModuleIdentifier();
+
+ std::string SymName;
+ SymName += "caml";
+ size_t Letter = SymName.size();
+ SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+ SymName += "__";
+ SymName += Id;
+
+ // Capitalize the first letter of the module name.
+ SymName[Letter] = toupper(SymName[Letter]);
+
+ SmallString<128> TmpStr;
+ AP.Mang->getNameWithPrefix(TmpStr, SymName);
+
+ MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
+
+ AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ AP.OutStreamer.EmitLabel(Sym);
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), AP, "code_begin");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+/// extern "C" struct align(sizeof(intptr_t)) {
+/// uint16_t NumDescriptors;
+/// struct align(sizeof(intptr_t)) {
+/// void *ReturnAddress;
+/// uint16_t FrameSize;
+/// uint16_t NumLiveOffsets;
+/// uint16_t LiveOffsets[NumLiveOffsets];
+/// } Descriptors[NumDescriptors];
+/// } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), AP, "code_end");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "data_end");
+
+ // FIXME: Why does ocaml emit this??
+ AP.OutStreamer.EmitIntValue(0, IntPtrSize);
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "frametable");
+
+ int NumDescriptors = 0;
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ NumDescriptors++;
+ }
+ }
+
+ if (NumDescriptors >= 1<<16) {
+ // Very rude!
+ report_fatal_error(" Too much descriptor for ocaml GC");
+ }
+ AP.EmitInt16(NumDescriptors);
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+
+ uint64_t FrameSize = FI.getFrameSize();
+ if (FrameSize >= 1<<16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Frame size " + Twine(FrameSize) + ">= 65536.\n"
+ "(" + Twine(uintptr_t(&FI)) + ")");
+ }
+
+ AP.OutStreamer.AddComment("live roots for " +
+ Twine(FI.getFunction().getName()));
+ AP.OutStreamer.AddBlankLine();
+
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ size_t LiveCount = FI.live_size(J);
+ if (LiveCount >= 1<<16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Live root count "+Twine(LiveCount)+" >= 65536.");
+ }
+
+ AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize);
+ AP.EmitInt16(FrameSize);
+ AP.EmitInt16(LiveCount);
+
+ for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+ KE = FI.live_end(J); K != KE; ++K) {
+ if (K->StackOffset >= 1<<16) {
+ // Very rude!
+ report_fatal_error(
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+ }
+ AP.EmitInt16(K->StackOffset);
+ }
+
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
new file mode 100644
index 0000000..1561012
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -0,0 +1,114 @@
+//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Win64 exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+Win64Exception::Win64Exception(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
+ {}
+
+Win64Exception::~Win64Exception() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void Win64Exception::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void Win64Exception::BeginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+
+ shouldEmitMoves = Asm->needsSEHMoves();
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+ shouldEmitPersonality = hasLandingPads &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym);
+
+ if (!shouldEmitPersonality)
+ return;
+
+ MCSymbol *GCCHandlerSym =
+ Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
+ Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void Win64Exception::EndFunction() {
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (shouldEmitPersonality) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+
+ Asm->OutStreamer.PushSection();
+ Asm->OutStreamer.EmitWin64EHHandlerData();
+ Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
+ 4);
+ EmitExceptionTable();
+ Asm->OutStreamer.PopSection();
+ }
+ Asm->OutStreamer.EmitWin64EHEndProc();
+}
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
new file mode 100644
index 0000000..012ff8a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -0,0 +1,466 @@
+//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the implementation of a basic TargetTransformInfo pass
+/// predicated on the target abstractions present in the target independent
+/// code generator. It uses these (primarily TargetLowering) to model as much
+/// of the TTI query interface as possible. It is included by most targets so
+/// that they can specialize only a small subset of the query space.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basictti"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+
+class BasicTTI : public ImmutablePass, public TargetTransformInfo {
+ const TargetLoweringBase *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ BasicTTI() : ImmutablePass(ID), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) {
+ initializeBasicTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ virtual bool isLegalAddImmediate(int64_t imm) const;
+ virtual bool isLegalICmpImmediate(int64_t imm) const;
+ virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) const;
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTypeLegal(Type *Ty) const;
+ virtual unsigned getJumpBufAlignment() const;
+ virtual unsigned getJumpBufSize() const;
+ virtual bool shouldBuildLookupTables() const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCFInstrCost(unsigned Opcode) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+ virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+ ArrayRef<Type*> Tys) const;
+ virtual unsigned getNumberOfParts(Type *Tp) const;
+ virtual unsigned getAddressComputationCost(Type *Ty) const;
+
+ /// @}
+};
+
+}
+
+INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
+ "Target independent code generator's TTI", true, true, false)
+char BasicTTI::ID = 0;
+
+ImmutablePass *
+llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) {
+ return new BasicTTI(TLI);
+}
+
+
+bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
+ return TLI->isLegalAddImmediate(imm);
+}
+
+bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
+ return TLI->isLegalICmpImmediate(imm);
+}
+
+bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) const {
+ TargetLoweringBase::AddrMode AM;
+ AM.BaseGV = BaseGV;
+ AM.BaseOffs = BaseOffset;
+ AM.HasBaseReg = HasBaseReg;
+ AM.Scale = Scale;
+ return TLI->isLegalAddressingMode(AM, Ty);
+}
+
+bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ return TLI->isTruncateFree(Ty1, Ty2);
+}
+
+bool BasicTTI::isTypeLegal(Type *Ty) const {
+ EVT T = TLI->getValueType(Ty);
+ return TLI->isTypeLegal(T);
+}
+
+unsigned BasicTTI::getJumpBufAlignment() const {
+ return TLI->getJumpBufAlignment();
+}
+
+unsigned BasicTTI::getJumpBufSize() const {
+ return TLI->getJumpBufSize();
+}
+
+bool BasicTTI::shouldBuildLookupTables() const {
+ return TLI->supportJumpTables() &&
+ (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Calls used by the vectorizers.
+//
+//===----------------------------------------------------------------------===//
+
+unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert,
+ bool Extract) const {
+ assert (Ty->isVectorTy() && "Can only scalarize vectors");
+ unsigned Cost = 0;
+
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ if (Insert)
+ Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ if (Extract)
+ Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ }
+
+ return Cost;
+}
+
+unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
+ return 1;
+}
+
+unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
+ return 32;
+}
+
+unsigned BasicTTI::getMaximumUnrollFactor() const {
+ return 1;
+}
+
+unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const {
+ // Check if any of the operands are vector operands.
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1.
+ // If the type is split to multiple registers, assume that thre is some
+ // overhead to this.
+ // TODO: Once we have extract/insert subvector cost we need to use them.
+ if (LT.first > 1)
+ return LT.first * 2;
+ return LT.first * 1;
+ }
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // If the operation is custom lowered then assume
+ // thare the code is twice as expensive.
+ return LT.first * 2;
+ }
+
+ // Else, assume that we need to scalarize this op.
+ if (Ty->isVectorTy()) {
+ unsigned Num = Ty->getVectorNumElements();
+ unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ // return the cost of multiple scalar invocation plus the cost of inserting
+ // and extracting the values.
+ return getScalarizationOverhead(Ty, true, true) + Num * Cost;
+ }
+
+ // We don't know anything about this scalar instruction.
+ return 1;
+}
+
+unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ return 1;
+}
+
+unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst);
+
+ // Check for NOOP conversions.
+ if (SrcLT.first == DstLT.first &&
+ SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+
+ // Bitcast between types that are legalized to the same type are free.
+ if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
+ return 0;
+ }
+
+ if (Opcode == Instruction::Trunc &&
+ TLI->isTruncateFree(SrcLT.second, DstLT.second))
+ return 0;
+
+ if (Opcode == Instruction::ZExt &&
+ TLI->isZExtFree(SrcLT.second, DstLT.second))
+ return 0;
+
+ // If the cast is marked as legal (or promote) then assume low cost.
+ if (TLI->isOperationLegalOrPromote(ISD, DstLT.second))
+ return 1;
+
+ // Handle scalar conversions.
+ if (!Src->isVectorTy() && !Dst->isVectorTy()) {
+
+ // Scalar bitcasts are usually free.
+ if (Opcode == Instruction::BitCast)
+ return 0;
+
+ // Just check the op cost. If the operation is legal then assume it costs 1.
+ if (!TLI->isOperationExpand(ISD, DstLT.second))
+ return 1;
+
+ // Assume that illegal scalar instruction are expensive.
+ return 4;
+ }
+
+ // Check vector-to-vector casts.
+ if (Dst->isVectorTy() && Src->isVectorTy()) {
+
+ // If the cast is between same-sized registers, then the check is simple.
+ if (SrcLT.first == DstLT.first &&
+ SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+
+ // Assume that Zext is done using AND.
+ if (Opcode == Instruction::ZExt)
+ return 1;
+
+ // Assume that sext is done using SHL and SRA.
+ if (Opcode == Instruction::SExt)
+ return 2;
+
+ // Just check the op cost. If the operation is legal then assume it costs
+ // 1 and multiply by the type-legalization overhead.
+ if (!TLI->isOperationExpand(ISD, DstLT.second))
+ return SrcLT.first * 1;
+ }
+
+ // If we are converting vectors and the operation is illegal, or
+ // if the vectors are legalized to different types, estimate the
+ // scalarization costs.
+ unsigned Num = Dst->getVectorNumElements();
+ unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(),
+ Src->getScalarType());
+
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values.
+ return getScalarizationOverhead(Dst, true, true) + Num * Cost;
+ }
+
+ // We already handled vector-to-vector and scalar-to-scalar conversions. This
+ // is where we handle bitcast between vectors and scalars. We need to assume
+ // that the conversion is scalarized in one way or another.
+ if (Opcode == Instruction::BitCast)
+ // Illegal bitcasts are done by storing and loading from a stack slot.
+ return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
+ (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
+
+ llvm_unreachable("Unhandled cast");
+ }
+
+unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const {
+ // Branches are assumed to be predicted.
+ return 0;
+}
+
+unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Selects on vectors are actually vector selects.
+ if (ISD == ISD::SELECT) {
+ assert(CondTy && "CondTy must exist");
+ if (CondTy->isVectorTy())
+ ISD = ISD::VSELECT;
+ }
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1. Multiply
+ // by the type-legalization overhead.
+ return LT.first * 1;
+ }
+
+ // Otherwise, assume that the cast is scalarized.
+ if (ValTy->isVectorTy()) {
+ unsigned Num = ValTy->getVectorNumElements();
+ if (CondTy)
+ CondTy = CondTy->getScalarType();
+ unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
+ CondTy);
+
+ // Return the cost of multiple scalar invocation plus the cost of inserting
+ // and extracting the values.
+ return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
+ }
+
+ // Unknown scalar opcode.
+ return 1;
+}
+
+unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ return 1;
+}
+
+unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ assert(!Src->isVoidTy() && "Invalid type");
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+ // Assume that all loads of legal types cost 1.
+ return LT.first;
+}
+
+unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> Tys) const {
+ unsigned ISD = 0;
+ switch (IID) {
+ default: {
+ // Assume that we need to scalarize this intrinsic.
+ unsigned ScalarizationCost = 0;
+ unsigned ScalarCalls = 1;
+ if (RetTy->isVectorTy()) {
+ ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
+ ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+ }
+ for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+ if (Tys[i]->isVectorTy()) {
+ ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+ ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+ }
+ }
+
+ return ScalarCalls + ScalarizationCost;
+ }
+ // Look for intrinsics that can be lowered directly or turned into a scalar
+ // intrinsic call.
+ case Intrinsic::sqrt: ISD = ISD::FSQRT; break;
+ case Intrinsic::sin: ISD = ISD::FSIN; break;
+ case Intrinsic::cos: ISD = ISD::FCOS; break;
+ case Intrinsic::exp: ISD = ISD::FEXP; break;
+ case Intrinsic::exp2: ISD = ISD::FEXP2; break;
+ case Intrinsic::log: ISD = ISD::FLOG; break;
+ case Intrinsic::log10: ISD = ISD::FLOG10; break;
+ case Intrinsic::log2: ISD = ISD::FLOG2; break;
+ case Intrinsic::fabs: ISD = ISD::FABS; break;
+ case Intrinsic::floor: ISD = ISD::FFLOOR; break;
+ case Intrinsic::ceil: ISD = ISD::FCEIL; break;
+ case Intrinsic::trunc: ISD = ISD::FTRUNC; break;
+ case Intrinsic::rint: ISD = ISD::FRINT; break;
+ case Intrinsic::pow: ISD = ISD::FPOW; break;
+ case Intrinsic::fma: ISD = ISD::FMA; break;
+ case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
+ }
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
+
+ if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1.
+ // If the type is split to multiple registers, assume that thre is some
+ // overhead to this.
+ // TODO: Once we have extract/insert subvector cost we need to use them.
+ if (LT.first > 1)
+ return LT.first * 2;
+ return LT.first * 1;
+ }
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // If the operation is custom lowered then assume
+ // thare the code is twice as expensive.
+ return LT.first * 2;
+ }
+
+ // Else, assume that we need to scalarize this intrinsic. For math builtins
+ // this will emit a costly libcall, adding call overhead and spills. Make it
+ // very expensive.
+ if (RetTy->isVectorTy()) {
+ unsigned Num = RetTy->getVectorNumElements();
+ unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
+ Tys);
+ return 10 * Cost * Num;
+ }
+
+ // This is going to be turned into a library call, make it expensive.
+ return 10;
+}
+
+unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ return LT.first;
+}
+
+unsigned BasicTTI::getAddressComputationCost(Type *Ty) const {
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 0000000..f8cc3b3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1725 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block. This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumHoist , "Number of times common instructions are hoisted");
+
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+ cl::desc("Max number of predecessors to consider tail merging"),
+ cl::init(150), cl::Hidden);
+
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+ cl::desc("Min number of instructions to consider tail merging"),
+ cl::init(3), cl::Hidden);
+
+namespace {
+ /// BranchFolderPass - Wrap branch folder in a machine function pass.
+ class BranchFolderPass : public MachineFunctionPass {
+ public:
+ static char ID;
+ explicit BranchFolderPass(): MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char BranchFolderPass::ID = 0;
+char &llvm::BranchFolderPassID = BranchFolderPass::ID;
+
+INITIALIZE_PASS(BranchFolderPass, "branch-folder",
+ "Control Flow Optimizer", false, false)
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true);
+ return Folder.OptimizeFunction(MF,
+ MF.getTarget().getInstrInfo(),
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+}
+
+
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
+ switch (FlagEnableTailMerge) {
+ case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_TRUE: EnableTailMerge = true; break;
+ case cl::BOU_FALSE: EnableTailMerge = false; break;
+ }
+
+ EnableHoistCommonCode = CommonHoist;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ MachineFunction *MF = MBB->getParent();
+ // drop all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // Avoid matching if this pointer gets reused.
+ TriedMerging.erase(MBB);
+
+ // Remove the block.
+ MF->erase(MBB);
+}
+
+/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
+/// followed by terminators, and if the implicitly defined registers are not
+/// used by the terminators, remove those implicit_def's. e.g.
+/// BB1:
+/// r0 = implicit_def
+/// r1 = implicit_def
+/// br
+/// This block can be optimized away later if the implicit instructions are
+/// removed.
+bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
+ SmallSet<unsigned, 4> ImpDefRegs;
+ MachineBasicBlock::iterator I = MBB->begin();
+ while (I != MBB->end()) {
+ if (!I->isImplicitDef())
+ break;
+ unsigned Reg = I->getOperand(0).getReg();
+ ImpDefRegs.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ ImpDefRegs.insert(*SubRegs);
+ ++I;
+ }
+ if (ImpDefRegs.empty())
+ return false;
+
+ MachineBasicBlock::iterator FirstTerm = I;
+ while (I != MBB->end()) {
+ if (!TII->isUnpredicatedTerminator(I))
+ return false;
+ // See if it uses any of the implicitly defined registers.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (ImpDefRegs.count(Reg))
+ return false;
+ }
+ ++I;
+ }
+
+ I = MBB->begin();
+ while (I != FirstTerm) {
+ MachineInstr *ImpDefMI = &*I;
+ ++I;
+ MBB->erase(ImpDefMI);
+ }
+
+ return true;
+}
+
+/// OptimizeFunction - Perhaps branch folding, tail merging and other
+/// CFG optimizations on the given function.
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineModuleInfo *mmi) {
+ if (!tii) return false;
+
+ TriedMerging.clear();
+
+ TII = tii;
+ TRI = tri;
+ MMI = mmi;
+ RS = NULL;
+
+ // Use a RegScavenger to help update liveness when required.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
+ RS = new RegScavenger();
+ else
+ MRI.invalidateLiveness();
+
+ // Fix CFG. The later algorithms expect it to be right.
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+ MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
+ MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ MadeChange |= OptimizeImpDefsBlock(MBB);
+ }
+
+ bool MadeChangeThisIteration = true;
+ while (MadeChangeThisIteration) {
+ MadeChangeThisIteration = TailMergeBlocks(MF);
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ if (EnableHoistCommonCode)
+ MadeChangeThisIteration |= HoistCommonCode(MF);
+ MadeChange |= MadeChangeThisIteration;
+ }
+
+ // See if any jump tables have become dead as the code generator
+ // did its thing.
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ if (JTI == 0) {
+ delete RS;
+ return MadeChange;
+ }
+
+ // Walk the function to find jump tables that are live.
+ BitVector JTIsLive(JTI->getJumpTables().size());
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I)
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ MachineOperand &Op = I->getOperand(op);
+ if (!Op.isJTI()) continue;
+
+ // Remember that this JT is live.
+ JTIsLive.set(Op.getIndex());
+ }
+ }
+
+ // Finally, remove dead jump tables. This happens when the
+ // indirect jump was unreachable (and thus deleted).
+ for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+ if (!JTIsLive.test(i)) {
+ JTI->RemoveJumpTable(i);
+ MadeChange = true;
+ }
+
+ delete RS;
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+ unsigned Hash = MI->getOpcode();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+
+ // Merge in bits from the operand if easy.
+ unsigned OperandHash = 0;
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register: OperandHash = Op.getReg(); break;
+ case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OperandHash = Op.getMBB()->getNumber();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ OperandHash = Op.getIndex();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ // Global address / external symbol are too hard, don't bother, but do
+ // pull in the offset.
+ OperandHash = Op.getOffset();
+ break;
+ default: break;
+ }
+
+ Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+ }
+ return Hash;
+}
+
+/// HashEndOfMBB - Hash the last instruction in the MBB.
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
+ MachineBasicBlock::const_iterator I = MBB->end();
+ if (I == MBB->begin())
+ return 0; // Empty MBB.
+
+ --I;
+ // Skip debug info so it will not affect codegen.
+ while (I->isDebugValue()) {
+ if (I==MBB->begin())
+ return 0; // MBB empty except for debug info.
+ --I;
+ }
+
+ return HashMachineInstr(I);
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end. Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2) {
+ I1 = MBB1->end();
+ I2 = MBB2->end();
+
+ unsigned TailLen = 0;
+ while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+ --I1; --I2;
+ // Skip debugging pseudos; necessary to avoid changing the code.
+ while (I1->isDebugValue()) {
+ if (I1==MBB1->begin()) {
+ while (I2->isDebugValue()) {
+ if (I2==MBB2->begin())
+ // I1==DBG at begin; I2==DBG at begin
+ return TailLen;
+ --I2;
+ }
+ ++I2;
+ // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
+ return TailLen;
+ }
+ --I1;
+ }
+ // I1==first (untested) non-DBG preceding known match
+ while (I2->isDebugValue()) {
+ if (I2==MBB2->begin()) {
+ ++I1;
+ // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
+ return TailLen;
+ }
+ --I2;
+ }
+ // I1, I2==first (untested) non-DBGs preceding known match
+ if (!I1->isIdenticalTo(I2) ||
+ // FIXME: This check is dubious. It's used to get around a problem where
+ // people incorrectly expect inline asm directives to remain in the same
+ // relative order. This is untenable because normal compiler
+ // optimizations (like this one) may reorder and/or merge these
+ // directives.
+ I1->isInlineAsm()) {
+ ++I1; ++I2;
+ break;
+ }
+ ++TailLen;
+ }
+ // Back past possible debugging pseudos at beginning of block. This matters
+ // when one block differs from the other only by whether debugging pseudos
+ // are present at the beginning. (This way, the various checks later for
+ // I1==MBB1->begin() work as expected.)
+ if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
+ --I2;
+ while (I2->isDebugValue()) {
+ if (I2 == MBB2->begin())
+ return TailLen;
+ --I2;
+ }
+ ++I2;
+ }
+ if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
+ --I1;
+ while (I1->isDebugValue()) {
+ if (I1 == MBB1->begin())
+ return TailLen;
+ --I1;
+ }
+ ++I1;
+ }
+ return TailLen;
+}
+
+void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
+ MachineBasicBlock *NewMBB) {
+ if (RS) {
+ RS->enterBasicBlock(CurMBB);
+ if (!CurMBB->empty())
+ RS->forward(prior(CurMBB->end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+ if (RegsLiveAtExit[i])
+ NewMBB->addLiveIn(i);
+ }
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest) {
+ MachineBasicBlock *CurMBB = OldInst->getParent();
+
+ TII->ReplaceTailWithBranchTo(OldInst, NewDest);
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ MaintainLiveIns(CurMBB, NewDest);
+
+ ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator. This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1) {
+ if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
+ return 0;
+
+ MachineFunction &MF = *CurMBB.getParent();
+
+ // Create the fall-through block.
+ MachineFunction::iterator MBBI = &CurMBB;
+ MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
+ CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+ // Move all the successors of this block to the specified block.
+ NewMBB->transferSuccessors(&CurMBB);
+
+ // Add an edge from CurMBB to NewMBB for the fall-through.
+ CurMBB.addSuccessor(NewMBB);
+
+ // Splice the code over.
+ NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ MaintainLiveIns(&CurMBB, NewMBB);
+
+ return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ unsigned Time = 0;
+ for (; I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+ if (I->isCall())
+ Time += 10;
+ else if (I->mayLoad() || I->mayStore())
+ Time += 2;
+ else
+ ++Time;
+ }
+ return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging). In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = CurMBB->getParent();
+ MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
+ if (I != MF->end() &&
+ !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ MachineBasicBlock *NextBB = I;
+ if (TBB == NextBB && !Cond.empty() && !FBB) {
+ if (!TII->ReverseBranchCondition(Cond)) {
+ TII->RemoveBranch(*CurMBB);
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl);
+ return;
+ }
+ }
+ }
+ TII->InsertBranch(*CurMBB, SuccBB, NULL,
+ SmallVector<MachineOperand, 0>(), dl);
+}
+
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+ if (getHash() < o.getHash())
+ return true;
+ if (getHash() > o.getHash())
+ return false;
+ if (getBlock()->getNumber() < o.getBlock()->getNumber())
+ return true;
+ if (getBlock()->getNumber() > o.getBlock()->getNumber())
+ return false;
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+ llvm_unreachable("Predecessor appears twice");
+#else
+ return false;
+#endif
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &I) {
+ I = MBB->end();
+ unsigned NumTerms = 0;
+ for (;;) {
+ if (I == MBB->begin()) {
+ I = MBB->end();
+ break;
+ }
+ --I;
+ if (!I->isTerminator()) break;
+ ++NumTerms;
+ }
+ return NumTerms;
+}
+
+/// ProfitableToMerge - Check if two machine basic blocks have a common tail
+/// and decide if it would be profitable to merge those tails. Return the
+/// length of the common tail and iterators to the first common instruction
+/// in each block.
+static bool ProfitableToMerge(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ unsigned minCommonTailLength,
+ unsigned &CommonTailLen,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
+ if (CommonTailLen == 0)
+ return false;
+ DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
+ << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+ << '\n');
+
+ // It's almost always profitable to merge any number of non-terminator
+ // instructions with the block that falls through into the common successor.
+ if (MBB1 == PredBB || MBB2 == PredBB) {
+ MachineBasicBlock::iterator I;
+ unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+ if (CommonTailLen > NumTerms)
+ return true;
+ }
+
+ // If one of the blocks can be completely merged and happens to be in
+ // a position where the other could fall through into it, merge any number
+ // of instructions, because it can be done without a branch.
+ // TODO: If the blocks are not adjacent, move one of them so that they are?
+ if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ return true;
+ if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ return true;
+
+ // If both blocks have an unconditional branch temporarily stripped out,
+ // count that as an additional common instruction for the following
+ // heuristics.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ !MBB1->back().isBarrier() &&
+ !MBB2->back().isBarrier())
+ ++EffectiveTailLen;
+
+ // Check if the common tail is long enough to be worthwhile.
+ if (EffectiveTailLen >= minCommonTailLength)
+ return true;
+
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ MachineFunction *MF = MBB1->getParent();
+ if (EffectiveTailLen >= 2 &&
+ MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+ (I1 == MBB1->begin() || I2 == MBB2->begin()))
+ return true;
+
+ return false;
+}
+
+/// ComputeSameTails - Look through all the blocks in MergePotentials that have
+/// hash CurHash (guaranteed to match the last element). Build the vector
+/// SameTails of all those that have the (same) largest number of instructions
+/// in common of any pair of these blocks. SameTails entries contain an
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// instruction where the matching code sequence begins.
+/// Order of elements in SameTails is the reverse of the order in which
+/// those blocks appear in MergePotentials (where they are not necessarily
+/// consecutive).
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ unsigned maxCommonTailLength = 0U;
+ SameTails.clear();
+ MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+ MPIterator HighestMPIter = prior(MergePotentials.end());
+ for (MPIterator CurMPIter = prior(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter != B && CurMPIter->getHash() == CurHash;
+ --CurMPIter) {
+ for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
+ unsigned CommonTailLen;
+ if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+ minCommonTailLength,
+ CommonTailLen, TrialBBI1, TrialBBI2,
+ SuccBB, PredBB)) {
+ if (CommonTailLen > maxCommonTailLength) {
+ SameTails.clear();
+ maxCommonTailLength = CommonTailLen;
+ HighestMPIter = CurMPIter;
+ SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
+ }
+ if (HighestMPIter == CurMPIter &&
+ CommonTailLen == maxCommonTailLength)
+ SameTails.push_back(SameTailElt(I, TrialBBI2));
+ }
+ if (I == B)
+ break;
+ }
+ }
+ return maxCommonTailLength;
+}
+
+/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
+/// MergePotentials, restoring branches at ends of blocks as appropriate.
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ MPIterator CurMPIter, B;
+ for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+ CurMPIter->getHash() == CurHash;
+ --CurMPIter) {
+ // Put the unconditional branch back, if we need one.
+ MachineBasicBlock *CurMBB = CurMPIter->getBlock();
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ if (CurMPIter == B)
+ break;
+ }
+ if (CurMPIter->getHash() != CurHash)
+ CurMPIter++;
+ MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
+/// only of the common tail. Create a block that does by splitting one.
+bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex) {
+ commonTailIndex = 0;
+ unsigned TimeEstimate = ~0U;
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ // Use PredBB if possible; that doesn't require a new branch.
+ if (SameTails[i].getBlock() == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ // Otherwise, make a (fairly bogus) choice based on estimate of
+ // how long it will take the various blocks to execute.
+ unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+ SameTails[i].getTailStartPos());
+ if (t <= TimeEstimate) {
+ TimeEstimate = t;
+ commonTailIndex = i;
+ }
+ }
+
+ MachineBasicBlock::iterator BBI =
+ SameTails[commonTailIndex].getTailStartPos();
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ // If the common tail includes any debug info we will take it pretty
+ // randomly from one of the inputs. Might be better to remove it?
+ DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
+ << maxCommonTailLength);
+
+ MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+ if (!newMBB) {
+ DEBUG(dbgs() << "... failed!");
+ return false;
+ }
+
+ SameTails[commonTailIndex].setBlock(newMBB);
+ SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
+ // If we split PredBB, newMBB is the new predecessor.
+ if (PredBB == MBB)
+ PredBB = newMBB;
+
+ return true;
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged. If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ bool MadeChange = false;
+
+ // Except for the special cases below, tail-merge if there are at least
+ // this many instructions in common.
+ unsigned minCommonTailLength = TailMergeSize;
+
+ DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", ");
+ dbgs() << "\n";
+ if (SuccBB) {
+ dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n';
+ if (PredBB)
+ dbgs() << " which has fall-through from BB#"
+ << PredBB->getNumber() << "\n";
+ }
+ dbgs() << "Looking for common tails of at least "
+ << minCommonTailLength << " instruction"
+ << (minCommonTailLength == 1 ? "" : "s") << '\n';
+ );
+
+ // Sort by hash value so that blocks with identical end sequences sort
+ // together.
+ std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+
+ // Walk through equivalence sets looking for actual exact matches.
+ while (MergePotentials.size() > 1) {
+ unsigned CurHash = MergePotentials.back().getHash();
+
+ // Build SameTails, identifying the set of blocks with this hash code
+ // and with the maximum number of instructions in common.
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ minCommonTailLength,
+ SuccBB, PredBB);
+
+ // If we didn't find any pair that has at least minCommonTailLength
+ // instructions in common, remove all blocks with this hash code and retry.
+ if (SameTails.empty()) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+
+ // If one of the blocks is the entire common tail (and not the entry
+ // block, which we can't jump to), we can treat all blocks with this same
+ // tail at once. Use PredBB if that is one of the possibilities, as that
+ // will not introduce any extra branches.
+ MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
+ getParent()->begin();
+ unsigned commonTailIndex = SameTails.size();
+ // If there are two blocks, check to see if one can be made to fall through
+ // into the other.
+ if (SameTails.size() == 2 &&
+ SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+ SameTails[1].tailIsWholeBlock())
+ commonTailIndex = 1;
+ else if (SameTails.size() == 2 &&
+ SameTails[1].getBlock()->isLayoutSuccessor(
+ SameTails[0].getBlock()) &&
+ SameTails[0].tailIsWholeBlock())
+ commonTailIndex = 0;
+ else {
+ // Otherwise just pick one, favoring the fall-through predecessor if
+ // there is one.
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = SameTails[i].getBlock();
+ if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+ continue;
+ if (MBB == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ if (SameTails[i].tailIsWholeBlock())
+ commonTailIndex = i;
+ }
+ }
+
+ if (commonTailIndex == SameTails.size() ||
+ (SameTails[commonTailIndex].getBlock() == PredBB &&
+ !SameTails[commonTailIndex].tailIsWholeBlock())) {
+ // None of the blocks consist entirely of the common tail.
+ // Split a block so that one does.
+ if (!CreateCommonTailOnlyBlock(PredBB,
+ maxCommonTailLength, commonTailIndex)) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+ }
+
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+ // MBB is common tail. Adjust all other BB's to jump to this one.
+ // Traversal must be forwards so erases work.
+ DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
+ << " for ");
+ for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+ if (commonTailIndex == i)
+ continue;
+ DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", "));
+ // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+ ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
+ // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+ MergePotentials.erase(SameTails[i].getMPIter());
+ }
+ DEBUG(dbgs() << "\n");
+ // We leave commonTailIndex in the worklist in case there are other blocks
+ // that match it with a smaller number of instructions.
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+ if (!EnableTailMerge) return MadeChange;
+
+ // First find blocks with no successors.
+ MergePotentials.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
+ if (TriedMerging.count(I))
+ continue;
+ if (I->succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
+ }
+
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(NULL, NULL);
+
+ // Look at blocks (IBB) with multiple predecessors (PBB).
+ // We change each predecessor to a canonical form, by
+ // (1) temporarily removing any unconditional branch from the predecessor
+ // to IBB, and
+ // (2) alter conditional branches so they branch to the other block
+ // not IBB; this may require adding back an unconditional branch to IBB
+ // later, where there wasn't one coming in. E.g.
+ // Bcc IBB
+ // fallthrough to QBB
+ // here becomes
+ // Bncc QBB
+ // with a conceptual B to IBB after that, which never actually exists.
+ // With those changes, we see whether the predecessors' tails match,
+ // and merge them if so. We change things out of canonical form and
+ // back to the way they were later in the process. (OptimizeBranches
+ // would undo some of this, but we can't use it, because we'd get into
+ // a compile-time infinite loop repeatedly doing and undoing the same
+ // transformations.)
+
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I) {
+ if (I->pred_size() < 2) continue;
+ SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ E2 = I->pred_end();
+ P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
+ MachineBasicBlock *PBB = *P;
+ if (TriedMerging.count(PBB))
+ continue;
+
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB == IBB)
+ continue;
+
+ // Visit each predecessor only once.
+ if (!UniquePreds.insert(PBB))
+ continue;
+
+ // Skip blocks which may jump to a landing pad. Can't tail merge these.
+ if (PBB->getLandingPadSuccessor())
+ continue;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and we cannot reverse the
+ // branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB == IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB)
+ FBB = llvm::next(MachineFunction::iterator(PBB));
+ }
+
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice to have
+ // a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock *PredNextBB = NULL;
+ if (IP != MF.end())
+ PredNextBB = IP;
+ if (TBB == NULL) {
+ if (IBB != PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB != IBB) // ubr
+ continue;
+ } else {
+ if (TBB != IBB && IBB != PredNextBB) // cbr
+ continue;
+ }
+ }
+
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
+ }
+
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
+ }
+ }
+
+ // If this is a large problem, avoid visiting the same basic blocks multiple
+ // times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+
+ // Reinsert an unconditional branch if needed. The 1 below can occur as a
+ // result of removing blocks in TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ MadeChange |= OptimizeBlock(MBB);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+ return MadeChange;
+}
+
+// Blocks should be considered empty if they contain only debug info;
+// else the debug info would affect codegen.
+static bool IsEmptyBlock(MachineBasicBlock *MBB) {
+ if (MBB->empty())
+ return true;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI!=MBBE; ++MBBI) {
+ if (!MBBI->isDebugValue())
+ return false;
+ }
+ return true;
+}
+
+// Blocks with only debug info and branches should be considered the same
+// as blocks with only branches.
+static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator MBBI, MBBE;
+ for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) {
+ if (!MBBI->isDebugValue())
+ break;
+ }
+ return (MBBI->isBranch());
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2. This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ // Right now, we use a simple heuristic. If MBB2 ends with a call, and
+ // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
+ // optimize branches that branch to either a return block or an assert block
+ // into a fallthrough to the return.
+ if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false;
+
+ // If there is a clear successor ordering we make sure that one block
+ // will fall through to the next
+ if (MBB1->isSuccessor(MBB2)) return true;
+ if (MBB2->isSuccessor(MBB1)) return false;
+
+ // Neither block consists entirely of debug info (per IsEmptyBlock check),
+ // so we needn't test for falling off the beginning here.
+ MachineBasicBlock::iterator MBB1I = --MBB1->end();
+ while (MBB1I->isDebugValue())
+ --MBB1I;
+ MachineBasicBlock::iterator MBB2I = --MBB2->end();
+ while (MBB2I->isDebugValue())
+ --MBB2I;
+ return MBB2I->isCall() && !MBB1I->isCall();
+}
+
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block. Always use the DebugLoc of the first
+/// branching instruction found unless its absent, in which case use the
+/// DebugLoc of the second if present.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return DebugLoc();
+ --I;
+ while (I->isDebugValue() && I != MBB.begin())
+ --I;
+ if (I->isBranch())
+ return I->getDebugLoc();
+ return DebugLoc();
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block. This is never called on the entry block.
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+ bool MadeChange = false;
+ MachineFunction &MF = *MBB->getParent();
+ReoptimizeBlock:
+
+ MachineFunction::iterator FallThrough = MBB;
+ ++FallThrough;
+
+ // If this block is empty, make everyone use its fall-through, not the block
+ // explicitly. Landing pads should not do this since the landing-pad table
+ // points to this block. Blocks with their addresses taken shouldn't be
+ // optimized away.
+ if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+ // Dead block? Leave for cleanup later.
+ if (MBB->pred_empty()) return MadeChange;
+
+ if (FallThrough == MF.end()) {
+ // TODO: Simplify preds to not branch here if possible!
+ } else {
+ // Rewrite all predecessors of the old block to go to the fallthrough
+ // instead.
+ while (!MBB->pred_empty()) {
+ MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+ Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+ }
+ // If MBB was the target of a jump table, update jump tables to go to the
+ // fallthrough instead.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
+
+ // Check to see if we can simplify the terminator of the block before this
+ // one.
+ MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ if (!PriorUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+ !PriorCond.empty());
+
+ // If the previous branch is conditional and both conditions go to the same
+ // destination, remove the branch, replacing it with an unconditional one or
+ // a fall-through.
+ if (PriorTBB && PriorTBB == PriorFBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ PriorCond.clear();
+ if (PriorTBB != MBB)
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the previous block unconditionally falls through to this block and
+ // this block has no other predecessors, move the contents of this block
+ // into the prior block. This doesn't usually happen when SimplifyCFG
+ // has been used, but it can happen if tail merging splits a fall-through
+ // predecessor of a block.
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+ PrevBB.succ_size() == 1 &&
+ !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
+ DEBUG(dbgs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *MBB);
+ // Remove redundant DBG_VALUEs first.
+ if (PrevBB.begin() != PrevBB.end()) {
+ MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
+ --PrevBBIter;
+ MachineBasicBlock::iterator MBBIter = MBB->begin();
+ // Check if DBG_VALUE at the end of PrevBB is identical to the
+ // DBG_VALUE at the beginning of MBB.
+ while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
+ && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
+ if (!MBBIter->isIdenticalTo(PrevBBIter))
+ break;
+ MachineInstr *DuplicateDbg = MBBIter;
+ ++MBBIter; -- PrevBBIter;
+ DuplicateDbg->eraseFromParent();
+ }
+ }
+ PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+
+ // If the previous branch *only* branches to *this* block (conditional or
+ // not) remove the branch.
+ if (PriorTBB == MBB && PriorFBB == 0) {
+ TII->RemoveBranch(PrevBB);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches somewhere else on the condition and here if
+ // the condition is false, remove the uncond second branch.
+ if (PriorFBB == MBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches here on true and somewhere else on false, and
+ // if the branch condition is reversible, reverse the branch to create a
+ // fall-through.
+ if (PriorTBB == MBB) {
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this block has no successors (e.g. it is a return block or ends with
+ // a call to a no-return function like abort or __cxa_throw) and if the pred
+ // falls through into this block, and if it would otherwise fall through
+ // into the block after this, move this block to the end of the function.
+ //
+ // We consider it more likely that execution will stay in the function (e.g.
+ // due to loops) than it is to exit it. This asserts in loops etc, moving
+ // the assert condition out of the loop body.
+ if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 &&
+ MachineFunction::iterator(PriorTBB) == FallThrough &&
+ !MBB->canFallThrough()) {
+ bool DoTransform = true;
+
+ // We have to be careful that the succs of PredBB aren't both no-successor
+ // blocks. If neither have successors and if PredBB is the second from
+ // last block in the function, we'd just keep swapping the two blocks for
+ // last. Only do the swap if one is clearly better to fall through than
+ // the other.
+ if (FallThrough == --MF.end() &&
+ !IsBetterFallthrough(PriorTBB, MBB))
+ DoTransform = false;
+
+ if (DoTransform) {
+ // Reverse the branch so we will fall through on the previous true cond.
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DEBUG(dbgs() << "\nMoving MBB: " << *MBB
+ << "To make fallthrough to: " << *PriorTBB << "\n");
+
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
+
+ // Move this block to the end of the function.
+ MBB->moveAfter(--MF.end());
+ MadeChange = true;
+ ++NumBranchOpts;
+ return MadeChange;
+ }
+ }
+ }
+ }
+
+ // Analyze the branch in the current block.
+ MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+ SmallVector<MachineOperand, 4> CurCond;
+ bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+ if (!CurUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+ // If this is a two-way branch, and the FBB branches to this block, reverse
+ // the condition so the single-basic-block loop is faster. Instead of:
+ // Loop: xxx; jcc Out; jmp Loop
+ // we want:
+ // Loop: xxx; jncc Loop; jmp Out
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ SmallVector<MachineOperand, 4> NewCond(CurCond);
+ if (!TII->ReverseBranchCondition(NewCond)) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
+ TII->RemoveBranch(*MBB);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this branch is the only thing in its block, see if we can forward
+ // other blocks across it.
+ if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
+ !MBB->hasAddressTaken()) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
+ // This block may contain just an unconditional branch. Because there can
+ // be 'non-branch terminators' in the block, try removing the branch and
+ // then seeing if the block is empty.
+ TII->RemoveBranch(*MBB);
+ // If the only things remaining in the block are debug info, remove these
+ // as well, so this will behave the same as an empty block in non-debug
+ // mode.
+ if (!MBB->empty()) {
+ bool NonDebugInfoFound = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (!I->isDebugValue()) {
+ NonDebugInfoFound = true;
+ break;
+ }
+ }
+ if (!NonDebugInfoFound)
+ // Make the block empty, losing the debug info (we could probably
+ // improve this in some cases.)
+ MBB->erase(MBB->begin(), MBB->end());
+ }
+ // If this block is just an unconditional branch to CurTBB, we can
+ // usually completely eliminate the block. The only case we cannot
+ // completely eliminate the block is when the block before this one
+ // falls through into MBB and we can't understand the prior block's branch
+ // condition.
+ if (MBB->empty()) {
+ bool PredHasNoFallThrough = !PrevBB.canFallThrough();
+ if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+ !PrevBB.isSuccessor(MBB)) {
+ // If the prior block falls through into us, turn it into an
+ // explicit branch to us to make updates simpler.
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ PriorTBB != MBB && PriorFBB != MBB) {
+ if (PriorTBB == 0) {
+ assert(PriorCond.empty() && PriorFBB == 0 &&
+ "Bad branch analysis");
+ PriorTBB = MBB;
+ } else {
+ assert(PriorFBB == 0 && "Machine CFG out of date!");
+ PriorFBB = MBB;
+ }
+ DebugLoc pdl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
+ }
+
+ // Iterate through all the predecessors, revectoring each in-turn.
+ size_t PI = 0;
+ bool DidChange = false;
+ bool HasBranchToSelf = false;
+ while(PI != MBB->pred_size()) {
+ MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+ if (PMBB == MBB) {
+ // If this block has an uncond branch to itself, leave it.
+ ++PI;
+ HasBranchToSelf = true;
+ } else {
+ DidChange = true;
+ PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ // If this change resulted in PMBB ending in a conditional
+ // branch where both conditions go to the same destination,
+ // change this to an unconditional branch (and fix the CFG).
+ MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0;
+ SmallVector<MachineOperand, 4> NewCurCond;
+ bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
+ NewCurFBB, NewCurCond, true);
+ if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ DebugLoc pdl = getBranchDebugLoc(*PMBB);
+ TII->RemoveBranch(*PMBB);
+ NewCurCond.clear();
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
+ }
+ }
+ }
+
+ // Change any jumptables to go to the new MBB.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
+ if (DidChange) {
+ ++NumBranchOpts;
+ MadeChange = true;
+ if (!HasBranchToSelf) return MadeChange;
+ }
+ }
+ }
+
+ // Add the branch back if the block is more than just an uncond branch.
+ TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl);
+ }
+ }
+
+ // If the prior block doesn't fall through into this block, and if this
+ // block doesn't fall through into some other block, see if we can find a
+ // place to move this block where a fall-through will happen.
+ if (!PrevBB.canFallThrough()) {
+
+ // Now we know that there was no fall-through into this block, check to
+ // see if it has a fall-through into its successor.
+ bool CurFallsThru = MBB->canFallThrough();
+
+ if (!MBB->isLandingPad()) {
+ // Check all the predecessors of this block. If one of them has no fall
+ // throughs, move this block right after it.
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI) {
+ // Analyze the branch at the end of the pred.
+ MachineBasicBlock *PredBB = *PI;
+ MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+ MachineBasicBlock *PredTBB = 0, *PredFBB = 0;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (PredBB != MBB && !PredBB->canFallThrough() &&
+ !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
+ && (!CurFallsThru || !CurTBB || !CurFBB)
+ && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ // If the current block doesn't fall through, just move it.
+ // If the current block can fall through and does not end with a
+ // conditional branch, we need to append an unconditional jump to
+ // the (current) next block. To avoid a possible compile-time
+ // infinite loop, move blocks only backward in this case.
+ // Also, if there are already 2 branches here, we cannot add a third;
+ // this means we have the case
+ // Bcc next
+ // B elsewhere
+ // next:
+ if (CurFallsThru) {
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+ CurCond.clear();
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
+ }
+ MBB->moveAfter(PredBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+ }
+
+ if (!CurFallsThru) {
+ // Check all successors to see if we can move this block before it.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ // Analyze the branch at the end of the block before the succ.
+ MachineBasicBlock *SuccBB = *SI;
+ MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+
+ // If this block doesn't already fall-through to that successor, and if
+ // the succ doesn't already have a block that can fall through into it,
+ // and if the successor isn't an EH destination, we can arrange for the
+ // fallthrough to happen.
+ if (SuccBB != MBB && &*SuccPrev != MBB &&
+ !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
+ !SuccBB->isLandingPad()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // Okay, there is no really great place to put this block. If, however,
+ // the block before this one would be a fall-through if this block were
+ // removed, move this block to the end of the function.
+ MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0;
+ SmallVector<MachineOperand, 4> PrevCond;
+ if (FallThrough != MF.end() &&
+ !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
+ PrevBB.isSuccessor(FallThrough)) {
+ MBB->moveAfter(--MF.end());
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Hoist Common Code
+//===----------------------------------------------------------------------===//
+
+/// HoistCommonCode - Hoist common instruction sequences at the start of basic
+/// blocks to their common predecessor.
+bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ MadeChange |= HoistCommonCodeInSuccs(MBB);
+ }
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// findHoistingInsertPosAndDeps - Find the location to move common instructions
+/// in successors to. The location is usually just before the terminator,
+/// however if the terminator is a conditional branch and its previous
+/// instruction is the flag setting instruction, the previous instruction is
+/// the preferred location. This function also gathers uses and defs of the
+/// instructions from the insertion point to the end of the block. The data is
+/// used by HoistCommonCodeInSuccs to ensure safety.
+static
+MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ SmallSet<unsigned,4> &Uses,
+ SmallSet<unsigned,4> &Defs) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ if (!TII->isUnpredicatedTerminator(Loc))
+ return MBB->end();
+
+ for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Loc->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
+ } else if (!MO.isDead())
+ // Don't try to hoist code in the rare case the terminator defines a
+ // register that is later used.
+ return MBB->end();
+ }
+
+ if (Uses.empty())
+ return Loc;
+ if (Loc == MBB->begin())
+ return MBB->end();
+
+ // The terminator is probably a conditional branch, try not to separate the
+ // branch from condition setting instruction.
+ MachineBasicBlock::iterator PI = Loc;
+ --PI;
+ while (PI != MBB->begin() && Loc->isDebugValue())
+ --PI;
+
+ bool IsDef = false;
+ for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ // If PI has a regmask operand, it is probably a call. Separate away.
+ if (MO.isRegMask())
+ return Loc;
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Uses.count(Reg))
+ IsDef = true;
+ }
+ if (!IsDef)
+ // The condition setting instruction is not just before the conditional
+ // branch.
+ return Loc;
+
+ // Be conservative, don't insert instruction above something that may have
+ // side-effects. And since it's potentially bad to separate flag setting
+ // instruction from the conditional branch, just abort the optimization
+ // completely.
+ // Also avoid moving code above predicated instruction since it's hard to
+ // reason about register liveness with predicated instruction.
+ bool DontMoveAcrossStore = true;
+ if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) ||
+ TII->isPredicated(PI))
+ return MBB->end();
+
+
+ // Find out what registers are live. Note this routine is ignoring other live
+ // registers which are only used by instructions in successor blocks.
+ for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
+ } else {
+ if (Uses.erase(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ }
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Defs.insert(*AI);
+ }
+ }
+
+ return PI;
+}
+
+/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
+/// sequence at the start of the function, move the instructions before MBB
+/// terminator if it's legal.
+bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+ return false;
+
+ if (!FBB) FBB = findFalseBlock(MBB, TBB);
+ if (!FBB)
+ // Malformed bcc? True and false blocks are the same?
+ return false;
+
+ // Restrict the optimization to cases where MBB is the only predecessor,
+ // it is an obvious win.
+ if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
+ return false;
+
+ // Find a suitable position to hoist the common instructions to. Also figure
+ // out which registers are used or defined by instructions from the insertion
+ // point to the end of the block.
+ SmallSet<unsigned, 4> Uses, Defs;
+ MachineBasicBlock::iterator Loc =
+ findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
+ if (Loc == MBB->end())
+ return false;
+
+ bool HasDups = false;
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallSet<unsigned, 4> LocalDefsSet;
+ MachineBasicBlock::iterator TIB = TBB->begin();
+ MachineBasicBlock::iterator FIB = FBB->begin();
+ MachineBasicBlock::iterator TIE = TBB->end();
+ MachineBasicBlock::iterator FIE = FBB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead))
+ break;
+
+ if (TII->isPredicated(TIB))
+ // Hard to reason about register liveness with predicated instruction.
+ break;
+
+ bool IsSafe = true;
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ // Don't attempt to hoist instructions with register masks.
+ if (MO.isRegMask()) {
+ IsSafe = false;
+ break;
+ }
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(Reg)) {
+ // Avoid clobbering a register that's used by the instruction at
+ // the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (Defs.count(Reg) && !MO.isDead()) {
+ // Don't hoist the instruction if the def would be clobber by the
+ // instruction at the point insertion. FIXME: This is overly
+ // conservative. It should be possible to hoist the instructions
+ // in BB2 in the following example:
+ // BB1:
+ // r1, eflag = op1 r2, r3
+ // brcc eflag
+ //
+ // BB2:
+ // r1 = op2, ...
+ // = op3, r1<kill>
+ IsSafe = false;
+ break;
+ }
+ } else if (!LocalDefsSet.count(Reg)) {
+ if (Defs.count(Reg)) {
+ // Use is defined by the instruction at the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (MO.isKill() && Uses.count(Reg))
+ // Kills a register that's read by the instruction at the point of
+ // insertion. Remove the kill marker.
+ MO.setIsKill(false);
+ }
+ }
+ if (!IsSafe)
+ break;
+
+ bool DontMoveAcrossStore = true;
+ if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
+ break;
+
+ // Remove kills from LocalDefsSet, these registers had short live ranges.
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !LocalDefsSet.count(Reg))
+ continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.erase(*AI);
+ }
+
+ // Track local defs so we can update liveins.
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ LocalDefs.push_back(Reg);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.insert(*AI);
+ }
+
+ HasDups = true;
+ ++TIB;
+ ++FIB;
+ }
+
+ if (!HasDups)
+ return false;
+
+ MBB->splice(Loc, TBB, TBB->begin(), TIB);
+ FBB->erase(FBB->begin(), FIB);
+
+ // Update livein's.
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Def = LocalDefs[i];
+ if (LocalDefsSet.count(Def)) {
+ TBB->addLiveIn(Def);
+ FBB->addLiveIn(Def);
+ }
+ }
+
+ ++NumHoist;
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
new file mode 100644
index 0000000..df795df
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,123 @@
+//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
+#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <vector>
+
+namespace llvm {
+ class MachineFunction;
+ class MachineModuleInfo;
+ class RegScavenger;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+
+ class BranchFolder {
+ public:
+ explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
+
+ bool OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineModuleInfo *mmi);
+ private:
+ class MergePotentialsElt {
+ unsigned Hash;
+ MachineBasicBlock *Block;
+ public:
+ MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+ : Hash(h), Block(b) {}
+
+ unsigned getHash() const { return Hash; }
+ MachineBasicBlock *getBlock() const { return Block; }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ Block = MBB;
+ }
+
+ bool operator<(const MergePotentialsElt &) const;
+ };
+ typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+ std::vector<MergePotentialsElt> MergePotentials;
+ SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
+
+ class SameTailElt {
+ MPIterator MPIter;
+ MachineBasicBlock::iterator TailStartPos;
+ public:
+ SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+ : MPIter(mp), TailStartPos(tsp) {}
+
+ MPIterator getMPIter() const {
+ return MPIter;
+ }
+ MergePotentialsElt &getMergePotentialsElt() const {
+ return *getMPIter();
+ }
+ MachineBasicBlock::iterator getTailStartPos() const {
+ return TailStartPos;
+ }
+ unsigned getHash() const {
+ return getMergePotentialsElt().getHash();
+ }
+ MachineBasicBlock *getBlock() const {
+ return getMergePotentialsElt().getBlock();
+ }
+ bool tailIsWholeBlock() const {
+ return TailStartPos == getBlock()->begin();
+ }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ getMergePotentialsElt().setBlock(MBB);
+ }
+ void setTailStartPos(MachineBasicBlock::iterator Pos) {
+ TailStartPos = Pos;
+ }
+ };
+ std::vector<SameTailElt> SameTails;
+
+ bool EnableTailMerge;
+ bool EnableHoistCommonCode;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineModuleInfo *MMI;
+ RegScavenger *RS;
+
+ bool TailMergeBlocks(MachineFunction &MF);
+ bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ void MaintainLiveIns(MachineBasicBlock *CurMBB,
+ MachineBasicBlock *NewMBB);
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest);
+ MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1);
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB);
+ void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex);
+
+ bool OptimizeBranches(MachineFunction &MF);
+ bool OptimizeBlock(MachineBasicBlock *MBB);
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+ bool HoistCommonCode(MachineFunction &MF);
+ bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
+ };
+}
+
+#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 0000000..dee339a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,202 @@
+//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "calcspillweights"
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+char CalculateSpillWeights::ID = 0;
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
+
+void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<LiveIntervals>();
+ au.addRequired<MachineLoopInfo>();
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>());
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ VRAI.CalculateWeightAndHint(LIS.getInterval(Reg));
+ }
+ return false;
+}
+
+// Return the preferred allocation register for reg, given a COPY instruction.
+static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+ const TargetRegisterInfo &tri,
+ const MachineRegisterInfo &mri) {
+ unsigned sub, hreg, hsub;
+ if (mi->getOperand(0).getReg() == reg) {
+ sub = mi->getOperand(0).getSubReg();
+ hreg = mi->getOperand(1).getReg();
+ hsub = mi->getOperand(1).getSubReg();
+ } else {
+ sub = mi->getOperand(1).getSubReg();
+ hreg = mi->getOperand(0).getReg();
+ hsub = mi->getOperand(0).getSubReg();
+ }
+
+ if (!hreg)
+ return 0;
+
+ if (TargetRegisterInfo::isVirtualRegister(hreg))
+ return sub == hsub ? hreg : 0;
+
+ const TargetRegisterClass *rc = mri.getRegClass(reg);
+
+ // Only allow physreg hints in rc.
+ if (sub == 0)
+ return rc->contains(hreg) ? hreg : 0;
+
+ // reg:sub should match the physreg hreg.
+ return tri.getMatchingSuperReg(hreg, sub, rc);
+}
+
+// Check if all values in LI are rematerializable
+static bool isRematerializable(const LiveInterval &LI,
+ const LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ if (VNI->isPHIDef())
+ return false;
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+
+ if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis()))
+ return false;
+ }
+ return true;
+}
+
+void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
+ MachineRegisterInfo &mri = MF.getRegInfo();
+ const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
+ MachineBasicBlock *mbb = 0;
+ MachineLoop *loop = 0;
+ unsigned loopDepth = 0;
+ bool isExiting = false;
+ float totalWeight = 0;
+ SmallPtrSet<MachineInstr*, 8> visited;
+
+ // Find the best physreg hist and the best virtreg hint.
+ float bestPhys = 0, bestVirt = 0;
+ unsigned hintPhys = 0, hintVirt = 0;
+
+ // Don't recompute a target specific hint.
+ bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+
+ // Don't recompute spill weight for an unspillable register.
+ bool Spillable = li.isSpillable();
+
+ for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
+ MachineInstr *mi = I.skipInstruction();) {
+ if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
+ continue;
+ if (!visited.insert(mi))
+ continue;
+
+ float weight = 1.0f;
+ if (Spillable) {
+ // Get loop info for mi.
+ if (mi->getParent() != mbb) {
+ mbb = mi->getParent();
+ loop = Loops.getLoopFor(mbb);
+ loopDepth = loop ? loop->getLoopDepth() : 0;
+ isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ }
+
+ // Calculate instr weight.
+ bool reads, writes;
+ tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+ weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+
+ // Give extra weight to what looks like a loop induction variable update.
+ if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
+ weight *= 3;
+
+ totalWeight += weight;
+ }
+
+ // Get allocation hints from copies.
+ if (noHint || !mi->isCopy())
+ continue;
+ unsigned hint = copyHint(mi, li.reg, tri, mri);
+ if (!hint)
+ continue;
+ float hweight = Hint[hint] += weight;
+ if (TargetRegisterInfo::isPhysicalRegister(hint)) {
+ if (hweight > bestPhys && mri.isAllocatable(hint))
+ bestPhys = hweight, hintPhys = hint;
+ } else {
+ if (hweight > bestVirt)
+ bestVirt = hweight, hintVirt = hint;
+ }
+ }
+
+ Hint.clear();
+
+ // Always prefer the physreg hint.
+ if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
+ mri.setRegAllocationHint(li.reg, 0, hint);
+ // Weakly boost the spill weight of hinted registers.
+ totalWeight *= 1.01F;
+ }
+
+ // If the live interval was already unspillable, leave it that way.
+ if (!Spillable)
+ return;
+
+ // Mark li as unspillable if all live ranges are tiny.
+ if (li.isZeroLength(LIS.getSlotIndexes())) {
+ li.markNotSpillable();
+ return;
+ }
+
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo()))
+ totalWeight *= 0.5F;
+
+ li.weight = normalizeSpillWeight(totalWeight, li.getSize());
+}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
new file mode 100644
index 0000000..f1d4ace
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -0,0 +1,180 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
+ const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &C)
+ : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
+ CallOrPrologue(Unknown) {
+ // No stack is used.
+ StackOffset = 0;
+
+ clearFirstByValReg();
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate space on the stack large enough to pass an argument
+// by value. The size and alignment information of the argument is encoded in
+// its parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ MF.getFrameInfo()->ensureMaxAlignment(Align);
+ TM.getTargetLowering()->HandleByVal(this, Size, Align);
+ unsigned Offset = AllocateStack(Size, Align);
+ addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI/32] |= 1 << (*AI&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an array of argument values,
+/// incorporating info about the formals into this state.
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ unsigned NumArgs = Ins.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Formal argument #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ return false;
+ }
+ return true;
+}
+
+/// AnalyzeReturn - Analyze the returned values of a return,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Return operand #" << i << " has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ unsigned NumOps = Outs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ MVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 0000000..35ec68d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,77 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeBasicTTIPass(Registry);
+ initializeBranchFolderPassPass(Registry);
+ initializeCalculateSpillWeightsPass(Registry);
+ initializeDeadMachineInstructionElimPass(Registry);
+ initializeEarlyIfConverterPass(Registry);
+ initializeExpandPostRAPass(Registry);
+ initializeExpandISelPseudosPass(Registry);
+ initializeFinalizeMachineBundlesPass(Registry);
+ initializeGCMachineCodeAnalysisPass(Registry);
+ initializeGCModuleInfoPass(Registry);
+ initializeIfConverterPass(Registry);
+ initializeLiveDebugVariablesPass(Registry);
+ initializeLiveIntervalsPass(Registry);
+ initializeLiveStacksPass(Registry);
+ initializeLiveVariablesPass(Registry);
+ initializeLocalStackSlotPassPass(Registry);
+ initializeMachineBlockFrequencyInfoPass(Registry);
+ initializeMachineBlockPlacementPass(Registry);
+ initializeMachineBlockPlacementStatsPass(Registry);
+ initializeMachineCopyPropagationPass(Registry);
+ initializeMachineCSEPass(Registry);
+ initializeMachineDominatorTreePass(Registry);
+ initializeMachinePostDominatorTreePass(Registry);
+ initializeMachineLICMPass(Registry);
+ initializeMachineLoopInfoPass(Registry);
+ initializeMachineModuleInfoPass(Registry);
+ initializeMachineSchedulerPass(Registry);
+ initializeMachineSinkingPass(Registry);
+ initializeMachineVerifierPassPass(Registry);
+ initializeOptimizePHIsPass(Registry);
+ initializePHIEliminationPass(Registry);
+ initializePeepholeOptimizerPass(Registry);
+ initializePostRASchedulerPass(Registry);
+ initializeProcessImplicitDefsPass(Registry);
+ initializePEIPass(Registry);
+ initializeRegisterCoalescerPass(Registry);
+ initializeSlotIndexesPass(Registry);
+ initializeStackProtectorPass(Registry);
+ initializeStackColoringPass(Registry);
+ initializeStackSlotColoringPass(Registry);
+ initializeStrongPHIEliminationPass(Registry);
+ initializeTailDuplicatePassPass(Registry);
+ initializeTargetPassConfigPass(Registry);
+ initializeTwoAddressInstructionPassPass(Registry);
+ initializeUnpackMachineBundlesPass(Registry);
+ initializeUnreachableBlockElimPass(Registry);
+ initializeUnreachableMachineBlockElimPass(Registry);
+ initializeVirtRegMapPass(Registry);
+ initializeVirtRegRewriterPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
+ initializeMachineFunctionPrinterPassPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+ initializeCodeGen(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
new file mode 100644
index 0000000..0eb74a4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -0,0 +1,658 @@
+//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+CriticalAntiDepBreaker::
+CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
+ AntiDepBreaker(), MF(MFi),
+ MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
+ TRI(MF.getTarget().getRegisterInfo()),
+ RegClassInfo(RCI),
+ Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
+ KillIndices(TRI->getNumRegs(), 0),
+ DefIndices(TRI->getNumRegs(), 0),
+ KeepRegs(TRI->getNumRegs(), false) {}
+
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
+}
+
+void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ // Clear out the register class data.
+ Classes[i] = static_cast<const TargetRegisterClass *>(0);
+
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+
+ // Clear "do not change" set.
+ KeepRegs.reset();
+
+ bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(BB);
+ for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ if (!IsReturnBlock && !Pristine.test(*I)) continue;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::FinishBlock() {
+ RegRefs.clear();
+ KeepRegs.reset();
+}
+
+void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ if (MI->isDebugValue())
+ return;
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] != ~0u) {
+ // If Reg is currently live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled).
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = Count;
+ } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Move the def index to the end of the previous region, to reflect
+ // that the def could theoretically have been scheduled at the end.
+ DefIndices[Reg] = InsertPosIndex;
+ }
+ }
+
+ PrescanInstruction(MI);
+ ScanInstruction(MI, Count);
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SDep *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ const SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ return Next;
+}
+
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+ // It's not safe to change register allocation for source operands of
+ // that have special allocation requirements. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservative here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
+ // Scan the register operands for this instruction and update
+ // Classes and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ const TargetRegisterClass *NewRC = 0;
+
+ if (i < MI->getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Now check for aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ // If an alias of the reg is used during the live range, give up.
+ // Note that this allows us to skip checking if AntiDepReg
+ // overlaps with any of the aliases, among other things.
+ unsigned AliasReg = *AI;
+ if (Classes[AliasReg]) {
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+
+ // If we're still willing to consider this register, note the reference.
+ if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ if (MO.isUse() && Special) {
+ if (!KeepRegs.test(Reg)) {
+ KeepRegs.set(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ KeepRegs.set(*SubRegs);
+ }
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ // Update liveness.
+ // Proceeding upwards, registers that are defed but not used in this
+ // instruction are now dead.
+
+ if (!TII->isPredicated(MI)) {
+ // Predicated defs are modeled as read + write, i.e. similar to two
+ // address updates.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (MO.isRegMask())
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+ if (MO.clobbersPhysReg(i)) {
+ DefIndices[i] = Count;
+ KillIndices[i] = ~0u;
+ KeepRegs.reset(i);
+ Classes[i] = 0;
+ RegRefs.erase(i);
+ }
+
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ DefIndices[Reg] = Count;
+ KillIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ KeepRegs.reset(Reg);
+ Classes[Reg] = 0;
+ RegRefs.erase(Reg);
+ // Repeat, for all subregs.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ KeepRegs.reset(SubregReg);
+ Classes[SubregReg] = 0;
+ RegRefs.erase(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isUse()) continue;
+
+ const TargetRegisterClass *NewRC = 0;
+ if (i < MI->getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // It wasn't previously live but now it is, this is a kill.
+ if (KillIndices[Reg] == ~0u) {
+ KillIndices[Reg] = Count;
+ DefIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ }
+ // Repeat, for all aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (KillIndices[AliasReg] == ~0u) {
+ KillIndices[AliasReg] = Count;
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+}
+
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg)
+{
+ for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+ MachineOperand *RefOper = I->second;
+
+ // Don't allow the instruction defining AntiDepReg to earlyclobber its
+ // operands, in case they may be assigned to NewReg. In this case antidep
+ // breaking must fail, but it's too rare to bother optimizing.
+ if (RefOper->isDef() && RefOper->isEarlyClobber())
+ return true;
+
+ // Handle cases in which this instructions defines NewReg.
+ MachineInstr *MI = RefOper->getParent();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &CheckOper = MI->getOperand(i);
+
+ if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
+ return true;
+
+ if (!CheckOper.isReg() || !CheckOper.isDef() ||
+ CheckOper.getReg() != NewReg)
+ continue;
+
+ // Don't allow the instruction to define NewReg and AntiDepReg.
+ // When AntiDepReg is renamed it will be an illegal op.
+ if (RefOper->isDef())
+ return true;
+
+ // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+ // NewReg
+ if (CheckOper.isEarlyClobber())
+ return true;
+
+ // Don't allow inline asm to define NewReg at all. Who know what it's
+ // doing with it.
+ if (MI->isInlineAsm())
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned CriticalAntiDepBreaker::
+findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC,
+ SmallVector<unsigned, 2> &Forbid)
+{
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned NewReg = Order[i];
+ // Don't replace a register with itself.
+ if (NewReg == AntiDepReg) continue;
+ // Don't replace a register with one that was recently used to repair
+ // an anti-dependence with this AntiDepReg, because that would
+ // re-introduce that anti-dependence.
+ if (NewReg == LastNewReg) continue;
+ // If any instructions that define AntiDepReg also define the NewReg, it's
+ // not suitable. For example, Instruction with multiple definitions can
+ // result in this condition.
+ if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
+ // If NewReg is dead and NewReg's most recent def is not before
+ // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+ assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+ && "Kill and Def maps aren't consistent for AntiDepReg!");
+ assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+ && "Kill and Def maps aren't consistent for NewReg!");
+ if (KillIndices[NewReg] != ~0u ||
+ Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+ KillIndices[AntiDepReg] > DefIndices[NewReg])
+ continue;
+ // If NewReg overlaps any of the forbidden registers, we can't use it.
+ bool Forbidden = false;
+ for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(),
+ ite = Forbid.end(); it != ite; ++it)
+ if (TRI->regsOverlap(NewReg, *it)) {
+ Forbidden = true;
+ break;
+ }
+ if (Forbidden) continue;
+ return NewReg;
+ }
+
+ // No registers are free and available!
+ return 0;
+}
+
+unsigned CriticalAntiDepBreaker::
+BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // Keep a map of the MachineInstr*'s back to the SUnit representing them.
+ // This is used for updating debug information.
+ //
+ // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
+ DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+
+ // Find the node at the bottom of the critical path.
+ const SUnit *Max = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ MISUnitMap[SU->getInstr()] = SU;
+ if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+ Max = SU;
+ }
+
+#ifndef NDEBUG
+ {
+ DEBUG(dbgs() << "Critical path has total latency "
+ << (Max->getDepth() + Max->Latency) << "\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] == ~0u)
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+#endif
+
+ // Track progress along the critical path through the SUnit graph as we walk
+ // the instructions.
+ const SUnit *CriticalPathSU = Max;
+ MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+ // Consider this pattern:
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // There are three anti-dependencies here, and without special care,
+ // we'd break all of them using the same register:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // because at each anti-dependence, B is the first register that
+ // isn't A which is free. This re-introduces anti-dependencies
+ // at all but one of the original anti-dependencies that we were
+ // trying to break. To avoid this, keep track of the most recent
+ // register that each register was replaced with, avoid
+ // using it to repair an anti-dependence on the same register.
+ // This lets us produce this:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // C = ...
+ // ... = C
+ // B = ...
+ // ... = B
+ // This still has an anti-dependence on B, but at least it isn't on the
+ // original critical path.
+ //
+ // TODO: If we tracked more than one register here, we could potentially
+ // fix that remaining critical edge too. This is a little more involved,
+ // because unlike the most recent register, less recent registers should
+ // still be considered, though only if no other registers are available.
+ std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
+
+ // Attempt to break anti-dependence edges on the critical path. Walk the
+ // instructions from the bottom up, tracking information about liveness
+ // as we go to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
+
+ // Check if this instruction has a dependence on the critical path that
+ // is an anti-dependence that we may be able to break. If it is, set
+ // AntiDepReg to the non-zero register associated with the anti-dependence.
+ //
+ // We limit our attention to the critical path as a heuristic to avoid
+ // breaking anti-dependence edges that aren't going to significantly
+ // impact the overall schedule. There are a limited number of registers
+ // and we want to save them for the important edges.
+ //
+ // TODO: Instructions with multiple defs could have multiple
+ // anti-dependencies. The current code here only knows how to break one
+ // edge per instruction. Note that we'd have to be able to break all of
+ // the anti-dependencies in an instruction in order to be effective.
+ unsigned AntiDepReg = 0;
+ if (MI == CriticalPathMI) {
+ if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+ const SUnit *NextSU = Edge->getSUnit();
+
+ // Only consider anti-dependence edges.
+ if (Edge->getKind() == SDep::Anti) {
+ AntiDepReg = Edge->getReg();
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+ if (!MRI.isAllocatable(AntiDepReg))
+ // Don't break anti-dependencies on non-allocatable registers.
+ AntiDepReg = 0;
+ else if (KeepRegs.test(AntiDepReg))
+ // Don't break anti-dependencies if an use down below requires
+ // this exact register.
+ AntiDepReg = 0;
+ else {
+ // If the SUnit has other dependencies on the SUnit that it
+ // anti-depends on, don't bother breaking the anti-dependency
+ // since those edges would prevent such units from being
+ // scheduled past each other regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
+ PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+ CriticalPathSU = NextSU;
+ CriticalPathMI = CriticalPathSU->getInstr();
+ } else {
+ // We've reached the end of the critical path.
+ CriticalPathSU = 0;
+ CriticalPathMI = 0;
+ }
+ }
+
+ PrescanInstruction(MI);
+
+ SmallVector<unsigned, 2> ForbidRegs;
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI))
+ // If this instruction's defs have special allocation requirement, don't
+ // break this anti-dependency.
+ AntiDepReg = 0;
+ else if (AntiDepReg) {
+ // If this instruction has a use of AntiDepReg, breaking it
+ // is invalid. If the instruction defines other registers,
+ // save a list of them so that we don't pick a new register
+ // that overlaps any of them.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ if (MO.isDef() && Reg != AntiDepReg)
+ ForbidRegs.push_back(Reg);
+ }
+ }
+
+ // Determine AntiDepReg's register class, if it is live and is
+ // consistently used within a single class.
+ const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
+ assert((AntiDepReg == 0 || RC != NULL) &&
+ "Register should be live if it's causing an anti-dependence!");
+ if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+ AntiDepReg = 0;
+
+ // Look for a suitable register to use to break the anti-depenence.
+ //
+ // TODO: Instead of picking the first free register, consider which might
+ // be the best.
+ if (AntiDepReg != 0) {
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+ AntiDepReg,
+ LastNewReg[AntiDepReg],
+ RC, ForbidRegs)) {
+ DEBUG(dbgs() << "Breaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg)
+ << " with " << RegRefs.count(AntiDepReg) << " references"
+ << " using " << TRI->getName(NewReg) << "!\n");
+
+ // Update the references to the old register to refer to the new
+ // register.
+ for (std::multimap<unsigned, MachineOperand *>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ Q->second->setReg(NewReg);
+ // If the SU for the instruction being updated has debug information
+ // related to the anti-dependency register, make sure to update that
+ // as well.
+ const SUnit *SU = MISUnitMap[Q->second->getParent()];
+ if (!SU) continue;
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q->second->getParent())
+ UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for the anti-dependence reg is now
+ // inconsistent. Set the state as if it were dead.
+ Classes[NewReg] = Classes[AntiDepReg];
+ DefIndices[NewReg] = DefIndices[AntiDepReg];
+ KillIndices[NewReg] = KillIndices[AntiDepReg];
+ assert(((KillIndices[NewReg] == ~0u) !=
+ (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+
+ Classes[AntiDepReg] = 0;
+ DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+ KillIndices[AntiDepReg] = ~0u;
+ assert(((KillIndices[AntiDepReg] == ~0u) !=
+ (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+
+ RegRefs.erase(AntiDepReg);
+ LastNewReg[AntiDepReg] = NewReg;
+ ++Broken;
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
new file mode 100644
index 0000000..df13dd3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -0,0 +1,110 @@
+//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include <map>
+
+namespace llvm {
+class RegisterClassInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+ class CriticalAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// AllocatableSet - The set of allocatable registers.
+ /// We'll be ignoring anti-dependencies on non-allocatable registers,
+ /// because they may not be safe to break.
+ const BitVector AllocatableSet;
+
+ /// Classes - For live regs that are only used in one register class in a
+ /// live range, the register class. If the register is not live, the
+ /// corresponding value is null. If the register is live but used in
+ /// multiple register classes, the corresponding value is -1 casted to a
+ /// pointer.
+ std::vector<const TargetRegisterClass*> Classes;
+
+ /// RegRefs - Map registers to all their references within a live range.
+ std::multimap<unsigned, MachineOperand *> RegRefs;
+ typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+ RegRefIter;
+
+ /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ /// KeepRegs - A set of registers which are live and cannot be changed to
+ /// break anti-dependencies.
+ BitVector KeepRegs;
+
+ public:
+ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
+ ~CriticalAntiDepBreaker();
+
+ /// Start - Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+ /// path
+ /// of the ScheduleDAG and break them by renaming registers.
+ ///
+ unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues);
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+ /// Finish - Finish anti-dep breaking for a basic block.
+ void FinishBlock();
+
+ private:
+ void PrescanInstruction(MachineInstr *MI);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg);
+ unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC,
+ SmallVector<unsigned, 2> &Forbid);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 0000000..840a101
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,225 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+ const unsigned *SET):
+ InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
+ DFAStateEntryTable(SET) {}
+
+
+//
+// ReadTable - Read the DFA transition table and update CachedTable.
+//
+// Format of the transition tables:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+// transitions
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
+// for the ith state
+//
+void DFAPacketizer::ReadTable(unsigned int state) {
+ unsigned ThisState = DFAStateEntryTable[state];
+ unsigned NextStateInTable = DFAStateEntryTable[state+1];
+ // Early exit in case CachedTable has already contains this
+ // state's transitions.
+ if (CachedTable.count(UnsignPair(state,
+ DFAStateInputTable[ThisState][0])))
+ return;
+
+ for (unsigned i = ThisState; i < NextStateInTable; i++)
+ CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
+ DFAStateInputTable[i][1];
+}
+
+
+// canReserveResources - Check if the resources occupied by a MCInstrDesc
+// are available in the current state.
+bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ return (CachedTable.count(StateTrans) != 0);
+}
+
+
+// reserveResources - Reserve the resources occupied by a MCInstrDesc and
+// change the current state to reflect that change.
+void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ assert(CachedTable.count(StateTrans) != 0);
+ CurrentState = CachedTable[StateTrans];
+}
+
+
+// canReserveResources - Check if the resources occupied by a machine
+// instruction are available in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ return canReserveResources(&MID);
+}
+
+// reserveResources - Reserve the resources occupied by a machine
+// instruction and change the current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ reserveResources(&MID);
+}
+
+namespace llvm {
+// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
+// Schedule method to build the dependence graph.
+class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+public:
+ DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT, bool IsPostRA);
+ // Schedule - Actual scheduling work.
+ void schedule();
+};
+}
+
+DefaultVLIWScheduler::DefaultVLIWScheduler(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ bool IsPostRA) :
+ ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+ CanHandleTerminators = true;
+}
+
+void DefaultVLIWScheduler::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(0);
+}
+
+// VLIWPacketizerList Ctor
+VLIWPacketizerList::VLIWPacketizerList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
+ TII = TM.getInstrInfo();
+ ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+}
+
+// VLIWPacketizerList Dtor
+VLIWPacketizerList::~VLIWPacketizerList() {
+ if (VLIWScheduler)
+ delete VLIWScheduler;
+
+ if (ResourceTracker)
+ delete ResourceTracker;
+}
+
+// endPacket - End the current packet, bundle packet instructions and reset
+// DFA state.
+void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ if (CurrentPacketMIs.size() > 1) {
+ MachineInstr *MIFirst = CurrentPacketMIs.front();
+ finalizeBundle(*MBB, MIFirst, MI);
+ }
+ CurrentPacketMIs.clear();
+ ResourceTracker->clearResources();
+}
+
+// PacketizeMIs - Bundle machine instructions into packets.
+void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator BeginItr,
+ MachineBasicBlock::iterator EndItr) {
+ assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+ VLIWScheduler->startBlock(MBB);
+ VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+ VLIWScheduler->schedule();
+
+ // Generate MI -> SU map.
+ MIToSUnit.clear();
+ for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
+ SUnit *SU = &VLIWScheduler->SUnits[i];
+ MIToSUnit[SU->getInstr()] = SU;
+ }
+
+ // The main packetizer loop.
+ for (; BeginItr != EndItr; ++BeginItr) {
+ MachineInstr *MI = BeginItr;
+
+ this->initPacketizerState();
+
+ // End the current packet if needed.
+ if (this->isSoloInstruction(MI)) {
+ endPacket(MBB, MI);
+ continue;
+ }
+
+ // Ignore pseudo instructions.
+ if (this->ignorePseudoInstruction(MI, MBB))
+ continue;
+
+ SUnit *SUI = MIToSUnit[MI];
+ assert(SUI && "Missing SUnit Info!");
+
+ // Ask DFA if machine resource is available for MI.
+ bool ResourceAvail = ResourceTracker->canReserveResources(MI);
+ if (ResourceAvail) {
+ // Dependency check for MI with instructions in CurrentPacketMIs.
+ for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
+ MachineInstr *MJ = *VI;
+ SUnit *SUJ = MIToSUnit[MJ];
+ assert(SUJ && "Missing SUnit Info!");
+
+ // Is it legal to packetize SUI and SUJ together.
+ if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
+ // Allow packetization if dependency can be pruned.
+ if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
+ // End the packet if dependency cannot be pruned.
+ endPacket(MBB, MI);
+ break;
+ } // !isLegalToPruneDependencies.
+ } // !isLegalToPacketizeTogether.
+ } // For all instructions in CurrentPacketMIs.
+ } else {
+ // End the packet if resource is not available.
+ endPacket(MBB, MI);
+ }
+
+ // Add MI to the current packet.
+ BeginItr = this->addToPacket(MI);
+ } // For all instructions in BB.
+
+ // End any packet left behind.
+ endPacket(MBB, EndItr);
+ VLIWScheduler->exitRegion();
+ VLIWScheduler->finishBlock();
+}
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 0000000..a54217f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,190 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-dce"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class DeadMachineInstructionElim : public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ BitVector LivePhysRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+ initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ private:
+ bool isDead(const MachineInstr *MI) const;
+ };
+}
+char DeadMachineInstructionElim::ID = 0;
+char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
+
+INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+ "Remove dead machine instructions", false, false)
+
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI->isInlineAsm())
+ return false;
+
+ // Don't delete instructions with side effects.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
+ return false;
+
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Don't delete live physreg defs, or any reserved register defs.
+ if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
+ return false;
+ } else {
+ if (!MRI->use_nodbg_empty(Reg))
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+ bool AnyChanges = false;
+ MRI = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Loop over all instructions in all blocks, from bottom to top, so that it's
+ // more likely that chains of dependent but ultimately dead instructions will
+ // be cleaned up.
+ for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
+ I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ // Start out assuming that reserved registers are live out of this block.
+ LivePhysRegs = MRI->getReservedRegs();
+
+ // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+ // live across blocks, but some targets (x86) can have flags live out of a
+ // block.
+ for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
+ E = MBB->succ_end(); S != E; S++)
+ for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
+ LI != (*S)->livein_end(); LI++)
+ LivePhysRegs.set(*LI);
+
+ // Now scan the instructions and delete dead ones, tracking physreg
+ // liveness as we go.
+ for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+ MIE = MBB->rend(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // If the instruction is dead, delete it!
+ if (isDead(MI)) {
+ DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I!=E; I=nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand& Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI==MI)
+ continue;
+ assert(Use.isDebug());
+ UseMI->getOperand(0).setReg(0U);
+ }
+ }
+ AnyChanges = true;
+ MI->eraseFromParent();
+ ++NumDeletes;
+ MIE = MBB->rend();
+ // MII is now pointing to the next instruction to process,
+ // so don't increment it.
+ continue;
+ }
+
+ // Record the physreg defs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.reset(Reg);
+ // Check the subreg set, not the alias set, because a def
+ // of a super-register may still be partially live after
+ // this def.
+ for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ LivePhysRegs.reset(*SR);
+ }
+ } else if (MO.isRegMask()) {
+ // Register mask of preserved registers. All clobbers are dead.
+ LivePhysRegs.clearBitsNotInMask(MO.getRegMask());
+ }
+ }
+ // Record the physreg uses, after the defs, in case a physreg is
+ // both defined and used in the same instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LivePhysRegs.set(*AI);
+ }
+ }
+ }
+
+ // We didn't delete the current instruction, so increment MII to
+ // the next one.
+ ++MII;
+ }
+ }
+
+ LivePhysRegs.clear();
+ return AnyChanges;
+}
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 0000000..f27ec77
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,183 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumResumesLowered, "Number of resume calls lowered");
+
+namespace {
+ class DwarfEHPrepare : public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLoweringBase *TLI;
+
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ Constant *RewindFunction;
+
+ bool InsertUnwindResumeCalls(Function &Fn);
+ Value *GetExceptionObject(ResumeInst *RI);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ DwarfEHPrepare(const TargetMachine *tm) :
+ FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
+ RewindFunction(0) {
+ initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+
+ const char *getPassName() const {
+ return "Exception handling preparation";
+ }
+ };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
+ return new DwarfEHPrepare(tm);
+}
+
+/// GetExceptionObject - Return the exception object from the value passed into
+/// the 'resume' instruction (typically an aggregate). Clean up any dead
+/// instructions, including the 'resume' instruction.
+Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+ Value *V = RI->getOperand(0);
+ Value *ExnObj = 0;
+ InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
+ LoadInst *SelLoad = 0;
+ InsertValueInst *ExcIVI = 0;
+ bool EraseIVIs = false;
+
+ if (SelIVI) {
+ if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) {
+ ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
+ if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
+ ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
+ ExnObj = ExcIVI->getOperand(1);
+ SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
+ EraseIVIs = true;
+ }
+ }
+ }
+
+ if (!ExnObj)
+ ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI);
+
+ RI->eraseFromParent();
+
+ if (EraseIVIs) {
+ if (SelIVI->getNumUses() == 0)
+ SelIVI->eraseFromParent();
+ if (ExcIVI->getNumUses() == 0)
+ ExcIVI->eraseFromParent();
+ if (SelLoad && SelLoad->getNumUses() == 0)
+ SelLoad->eraseFromParent();
+ }
+
+ return ExnObj;
+}
+
+/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
+/// into calls to the appropriate _Unwind_Resume function.
+bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
+ bool UsesNewEH = false;
+ SmallVector<ResumeInst*, 16> Resumes;
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
+ Resumes.push_back(RI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(TI))
+ UsesNewEH = II->getUnwindDest()->isLandingPad();
+ }
+
+ if (Resumes.empty())
+ return UsesNewEH;
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ LLVMContext &Ctx = Resumes[0]->getContext();
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+ Type::getInt8PtrTy(Ctx), false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ // Create the basic block where the _Unwind_Resume call will live.
+ LLVMContext &Ctx = Fn.getContext();
+ unsigned ResumesSize = Resumes.size();
+
+ if (ResumesSize == 1) {
+ // Instead of creating a new BB and PHI node, just append the call to
+ // _Unwind_Resume to the end of the single resume block.
+ ResumeInst *RI = Resumes.front();
+ BasicBlock *UnwindBB = RI->getParent();
+ Value *ExnObj = GetExceptionObject(RI);
+
+ // Call the _Unwind_Resume function.
+ CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+ return true;
+ }
+
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
+ "exn.obj", UnwindBB);
+
+ // Extract the exception object from the ResumeInst and add it to the PHI node
+ // that feeds the _Unwind_Resume call.
+ for (SmallVectorImpl<ResumeInst*>::iterator
+ I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
+ ResumeInst *RI = *I;
+ BasicBlock *Parent = RI->getParent();
+ BranchInst::Create(UnwindBB, Parent);
+
+ Value *ExnObj = GetExceptionObject(RI);
+ PN->addIncoming(ExnObj, Parent);
+
+ ++NumResumesLowered;
+ }
+
+ // Call the function.
+ CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+ return true;
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+ bool Changed = InsertUnwindResumeCalls(Fn);
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
new file mode 100644
index 0000000..5447df0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -0,0 +1,801 @@
+//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Early if-conversion is for out-of-order CPUs that don't have a lot of
+// predicable instructions. The goal is to eliminate conditional branches that
+// may mispredict.
+//
+// Instructions from both sides of the branch are executed specutatively, and a
+// cmov instruction selects the result.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-ifcvt"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+// Absolute maximum number of instructions allowed per speculated block.
+// This bypasses all other heuristics, so it should be set fairly high.
+static cl::opt<unsigned>
+BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
+ cl::desc("Maximum number of instructions per speculated block."));
+
+// Stress testing mode - disable heuristics.
+static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
+ cl::desc("Turn all knobs to 11"));
+
+STATISTIC(NumDiamondsSeen, "Number of diamonds");
+STATISTIC(NumDiamondsConv, "Number of diamonds converted");
+STATISTIC(NumTrianglesSeen, "Number of triangles");
+STATISTIC(NumTrianglesConv, "Number of triangles converted");
+
+//===----------------------------------------------------------------------===//
+// SSAIfConv
+//===----------------------------------------------------------------------===//
+//
+// The SSAIfConv class performs if-conversion on SSA form machine code after
+// determining if it is possible. The class contains no heuristics; external
+// code should be used to determine when if-conversion is a good idea.
+//
+// SSAIfConv can convert both triangles and diamonds:
+//
+// Triangle: Head Diamond: Head
+// | \ / \_
+// | \ / |
+// | [TF]BB FBB TBB
+// | / \ /
+// | / \ /
+// Tail Tail
+//
+// Instructions in the conditional blocks TBB and/or FBB are spliced into the
+// Head block, and phis in the Tail block are converted to select instructions.
+//
+namespace {
+class SSAIfConv {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+public:
+ /// The block containing the conditional branch.
+ MachineBasicBlock *Head;
+
+ /// The block containing phis after the if-then-else.
+ MachineBasicBlock *Tail;
+
+ /// The 'true' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *TBB;
+
+ /// The 'false' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *FBB;
+
+ /// isTriangle - When there is no 'else' block, either TBB or FBB will be
+ /// equal to Tail.
+ bool isTriangle() const { return TBB == Tail || FBB == Tail; }
+
+ /// Returns the Tail predecessor for the True side.
+ MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
+
+ /// Returns the Tail predecessor for the False side.
+ MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
+
+ /// Information about each phi in the Tail block.
+ struct PHIInfo {
+ MachineInstr *PHI;
+ unsigned TReg, FReg;
+ // Latencies from Cond+Branch, TReg, and FReg to DstReg.
+ int CondCycles, TCycles, FCycles;
+
+ PHIInfo(MachineInstr *phi)
+ : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {}
+ };
+
+ SmallVector<PHIInfo, 8> PHIs;
+
+private:
+ /// The branch condition determined by AnalyzeBranch.
+ SmallVector<MachineOperand, 4> Cond;
+
+ /// Instructions in Head that define values used by the conditional blocks.
+ /// The hoisted instructions must be inserted after these instructions.
+ SmallPtrSet<MachineInstr*, 8> InsertAfter;
+
+ /// Register units clobbered by the conditional blocks.
+ BitVector ClobberedRegUnits;
+
+ // Scratch pad for findInsertionPoint.
+ SparseSet<unsigned> LiveRegUnits;
+
+ /// Insertion point in Head for speculatively executed instructions form TBB
+ /// and FBB.
+ MachineBasicBlock::iterator InsertionPoint;
+
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// speculated.
+ bool canSpeculateInstrs(MachineBasicBlock *MBB);
+
+ /// Find a valid insertion point in Head.
+ bool findInsertionPoint();
+
+ /// Replace PHI instructions in Tail with selects.
+ void replacePHIInstrs();
+
+ /// Insert selects and rewrite PHI operands to use them.
+ void rewritePHIOperands();
+
+public:
+ /// runOnMachineFunction - Initialize per-function data structures.
+ void runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LiveRegUnits.clear();
+ LiveRegUnits.setUniverse(TRI->getNumRegUnits());
+ ClobberedRegUnits.clear();
+ ClobberedRegUnits.resize(TRI->getNumRegUnits());
+ }
+
+ /// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
+ /// initialize the internal state, and return true.
+ bool canConvertIf(MachineBasicBlock *MBB);
+
+ /// convertIf - If-convert the last block passed to canConvertIf(), assuming
+ /// it is possible. Add any erased blocks to RemovedBlocks.
+ void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks);
+};
+} // end anonymous namespace
+
+
+/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely
+/// be speculated. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator(); I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (I->isPHI()) {
+ DEBUG(dbgs() << "Can't hoist: " << *I);
+ return false;
+ }
+
+ // Don't speculate loads. Note that it may be possible and desirable to
+ // speculate GOT or constant pool loads that are guaranteed not to trap,
+ // but we don't support that for now.
+ if (I->mayLoad()) {
+ DEBUG(dbgs() << "Won't speculate load: " << *I);
+ return false;
+ }
+
+ // We never speculate stores, so an AA pointer isn't necessary.
+ bool DontMoveAcrossStore = true;
+ if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
+ DEBUG(dbgs() << "Can't speculate: " << *I);
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ if (MO->isRegMask()) {
+ DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ return false;
+ }
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+
+ // Remember clobbered regunits.
+ if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ ClobberedRegUnits.set(*Units);
+
+ if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI || DefMI->getParent() != Head)
+ continue;
+ if (InsertAfter.insert(DefMI))
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
+ if (DefMI->isTerminator()) {
+ DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+
+/// Find an insertion point in Head for the speculated instructions. The
+/// insertion point must be:
+///
+/// 1. Before any terminators.
+/// 2. After any instructions in InsertAfter.
+/// 3. Not have any clobbered regunits live.
+///
+/// This function sets InsertionPoint and returns true when successful, it
+/// returns false if no valid insertion point could be found.
+///
+bool SSAIfConv::findInsertionPoint() {
+ // Keep track of live regunits before the current position.
+ // Only track RegUnits that are also in ClobberedRegUnits.
+ LiveRegUnits.clear();
+ SmallVector<unsigned, 8> Reads;
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ MachineBasicBlock::iterator I = Head->end();
+ MachineBasicBlock::iterator B = Head->begin();
+ while (I != B) {
+ --I;
+ // Some of the conditional code depends in I.
+ if (InsertAfter.count(I)) {
+ DEBUG(dbgs() << "Can't insert code after " << *I);
+ return false;
+ }
+
+ // Update live regunits.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ // We're ignoring regmask operands. That is conservatively correct.
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // I clobbers Reg, so it isn't live before I.
+ if (MO->isDef())
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ LiveRegUnits.erase(*Units);
+ // Unless I reads Reg.
+ if (MO->readsReg())
+ Reads.push_back(Reg);
+ }
+ // Anything read by I is live before I.
+ while (!Reads.empty())
+ for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
+ ++Units)
+ if (ClobberedRegUnits.test(*Units))
+ LiveRegUnits.insert(*Units);
+
+ // We can't insert before a terminator.
+ if (I != FirstTerm && I->isTerminator())
+ continue;
+
+ // Some of the clobbered registers are live before I, not a valid insertion
+ // point.
+ if (!LiveRegUnits.empty()) {
+ DEBUG({
+ dbgs() << "Would clobber";
+ for (SparseSet<unsigned>::const_iterator
+ i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
+ dbgs() << ' ' << PrintRegUnit(*i, TRI);
+ dbgs() << " live before " << *I;
+ });
+ continue;
+ }
+
+ // This is a valid insertion point.
+ InsertionPoint = I;
+ DEBUG(dbgs() << "Can insert before " << *I);
+ return true;
+ }
+ DEBUG(dbgs() << "No legal insertion point found.\n");
+ return false;
+}
+
+
+
+/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
+/// a potential candidate for if-conversion. Fill out the internal state.
+///
+bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
+ Head = MBB;
+ TBB = FBB = Tail = 0;
+
+ if (Head->succ_size() != 2)
+ return false;
+ MachineBasicBlock *Succ0 = Head->succ_begin()[0];
+ MachineBasicBlock *Succ1 = Head->succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Succ0->pred_size() != 1)
+ std::swap(Succ0, Succ1);
+
+ if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
+ return false;
+
+ Tail = Succ0->succ_begin()[0];
+
+ // This is not a triangle.
+ if (Tail != Succ1) {
+ // Check for a diamond. We won't deal with any critical edges.
+ if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
+ Succ1->succ_begin()[0] != Tail)
+ return false;
+ DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << "/BB#" << Succ1->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+
+ // Live-in physregs are tricky to get right when speculating code.
+ if (!Tail->livein_empty()) {
+ DEBUG(dbgs() << "Tail has live-ins.\n");
+ return false;
+ }
+ } else {
+ DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+ }
+
+ // This is a triangle or a diamond.
+ // If Tail doesn't have any phis, there must be side effects.
+ if (Tail->empty() || !Tail->front().isPHI()) {
+ DEBUG(dbgs() << "No phis in tail.\n");
+ return false;
+ }
+
+ // The branch we're looking to eliminate must be analyzable.
+ Cond.clear();
+ if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) {
+ DEBUG(dbgs() << "Branch not analyzable.\n");
+ return false;
+ }
+
+ // This is weird, probably some sort of degenerate CFG.
+ if (!TBB) {
+ DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
+ return false;
+ }
+
+ // AnalyzeBranch doesn't set FBB on a fall-through branch.
+ // Make sure it is always set.
+ FBB = TBB == Succ0 ? Succ1 : Succ0;
+
+ // Any phis in the tail block must be convertible to selects.
+ PHIs.clear();
+ MachineBasicBlock *TPred = getTPred();
+ MachineBasicBlock *FPred = getFPred();
+ for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
+ I != E && I->isPHI(); ++I) {
+ PHIs.push_back(&*I);
+ PHIInfo &PI = PHIs.back();
+ // Find PHI operands corresponding to TPred and FPred.
+ for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {
+ if (PI.PHI->getOperand(i+1).getMBB() == TPred)
+ PI.TReg = PI.PHI->getOperand(i).getReg();
+ if (PI.PHI->getOperand(i+1).getMBB() == FPred)
+ PI.FReg = PI.PHI->getOperand(i).getReg();
+ }
+ assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI");
+ assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI");
+
+ // Get target information.
+ if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
+ PI.CondCycles, PI.TCycles, PI.FCycles)) {
+ DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
+ return false;
+ }
+ }
+
+ // Check that the conditional instructions can be speculated.
+ InsertAfter.clear();
+ ClobberedRegUnits.reset();
+ if (TBB != Tail && !canSpeculateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canSpeculateInstrs(FBB))
+ return false;
+
+ // Try to find a valid insertion point for the speculated instructions in the
+ // head basic block.
+ if (!findInsertionPoint())
+ return false;
+
+ if (isTriangle())
+ ++NumTrianglesSeen;
+ else
+ ++NumDiamondsSeen;
+ return true;
+}
+
+/// replacePHIInstrs - Completely replace PHI instructions with selects.
+/// This is possible when the only Tail predecessors are the if-converted
+/// blocks.
+void SSAIfConv::replacePHIInstrs() {
+ assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ unsigned DstReg = PI.PHI->getOperand(0).getReg();
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+ PI.PHI->eraseFromParent();
+ PI.PHI = 0;
+ }
+}
+
+/// rewritePHIOperands - When there are additional Tail predecessors, insert
+/// select instructions in Head and rewrite PHI operands to use the selects.
+/// Keep the PHI instructions in Tail to handle the other predecessors.
+void SSAIfConv::rewritePHIOperands() {
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+ unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+
+ // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
+ for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
+ MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
+ if (MBB == getTPred()) {
+ PI.PHI->getOperand(i-1).setMBB(Head);
+ PI.PHI->getOperand(i-2).setReg(DstReg);
+ } else if (MBB == getFPred()) {
+ PI.PHI->RemoveOperand(i-1);
+ PI.PHI->RemoveOperand(i-2);
+ }
+ }
+ DEBUG(dbgs() << " --> " << *PI.PHI);
+ }
+}
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+ assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+ // Update statistics.
+ if (isTriangle())
+ ++NumTrianglesConv;
+ else
+ ++NumDiamondsConv;
+
+ // Move all instructions into Head, except for the terminators.
+ if (TBB != Tail)
+ Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+ if (FBB != Tail)
+ Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+
+ // Are there extra Tail predecessors?
+ bool ExtraPreds = Tail->pred_size() != 2;
+ if (ExtraPreds)
+ rewritePHIOperands();
+ else
+ replacePHIInstrs();
+
+ // Fix up the CFG, temporarily leave Head without any successors.
+ Head->removeSuccessor(TBB);
+ Head->removeSuccessor(FBB);
+ if (TBB != Tail)
+ TBB->removeSuccessor(Tail);
+ if (FBB != Tail)
+ FBB->removeSuccessor(Tail);
+
+ // Fix up Head's terminators.
+ // It should become a single branch or a fallthrough.
+ DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
+ TII->RemoveBranch(*Head);
+
+ // Erase the now empty conditional blocks. It is likely that Head can fall
+ // through to Tail, and we can join the two blocks.
+ if (TBB != Tail) {
+ RemovedBlocks.push_back(TBB);
+ TBB->eraseFromParent();
+ }
+ if (FBB != Tail) {
+ RemovedBlocks.push_back(FBB);
+ FBB->eraseFromParent();
+ }
+
+ assert(Head->succ_empty() && "Additional head successors?");
+ if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
+ // Splice Tail onto the end of Head.
+ DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
+ << " into head BB#" << Head->getNumber() << '\n');
+ Head->splice(Head->end(), Tail,
+ Tail->begin(), Tail->end());
+ Head->transferSuccessorsAndUpdatePHIs(Tail);
+ RemovedBlocks.push_back(Tail);
+ Tail->eraseFromParent();
+ } else {
+ // We need a branch to Tail, let code placement work it out later.
+ DEBUG(dbgs() << "Converting to unconditional branch.\n");
+ SmallVector<MachineOperand, 0> EmptyCond;
+ TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL);
+ Head->addSuccessor(Tail);
+ }
+ DEBUG(dbgs() << *Head);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyIfConverter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfConverter : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MCSchedModel *SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfConverter() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &MF);
+ const char *getPassName() const { return "Early If-Conversion"; }
+
+private:
+ bool tryConvertIf(MachineBasicBlock*);
+ void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
+ void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
+ void invalidateTraces();
+ bool shouldConvertIf();
+};
+} // end anonymous namespace
+
+char EarlyIfConverter::ID = 0;
+char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+
+void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Update the dominator tree after if-conversion erased some blocks.
+void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) {
+ // convertIf can remove TBB, FBB, and Tail can be merged into Head.
+ // TBB and FBB should not dominate any blocks.
+ // Tail children should be transferred to Head.
+ MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ assert(Node != HeadNode && "Cannot erase the head node");
+ while (Node->getNumChildren()) {
+ assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
+ DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
+ }
+ DomTree->eraseNode(Removed[i]);
+ }
+}
+
+/// Update LoopInfo after if-conversion.
+void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
+ if (!Loops)
+ return;
+ // If-conversion doesn't change loop structure, and it doesn't mess with back
+ // edges, so updating LoopInfo is simply removing the dead blocks.
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i)
+ Loops->removeBlock(Removed[i]);
+}
+
+/// Invalidate MachineTraceMetrics before if-conversion.
+void EarlyIfConverter::invalidateTraces() {
+ Traces->verifyAnalysis();
+ Traces->invalidate(IfConv.Head);
+ Traces->invalidate(IfConv.Tail);
+ Traces->invalidate(IfConv.TBB);
+ Traces->invalidate(IfConv.FBB);
+ Traces->verifyAnalysis();
+}
+
+// Adjust cycles with downward saturation.
+static unsigned adjCycles(unsigned Cyc, int Delta) {
+ if (Delta < 0 && Cyc + Delta > Cyc)
+ return 0;
+ return Cyc + Delta;
+}
+
+/// Apply cost model and heuristics to the if-conversion in IfConv.
+/// Return true if the conversion is a good idea.
+///
+bool EarlyIfConverter::shouldConvertIf() {
+ // Stress testing mode disables all cost considerations.
+ if (Stress)
+ return true;
+
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
+ MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
+ DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+ unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
+ FBBTrace.getCriticalPath());
+
+ // Set a somewhat arbitrary limit on the critical path extension we accept.
+ unsigned CritLimit = SchedModel->MispredictPenalty/2;
+
+ // If-conversion only makes sense when there is unexploited ILP. Compute the
+ // maximum-ILP resource length of the trace after if-conversion. Compare it
+ // to the shortest critical path.
+ SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
+ if (IfConv.TBB != IfConv.Tail)
+ ExtraBlocks.push_back(IfConv.TBB);
+ unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
+ DEBUG(dbgs() << "Resource length " << ResLength
+ << ", minimal critical path " << MinCrit << '\n');
+ if (ResLength > MinCrit + CritLimit) {
+ DEBUG(dbgs() << "Not enough available ILP.\n");
+ return false;
+ }
+
+ // Assume that the depth of the first head terminator will also be the depth
+ // of the select instruction inserted, as determined by the flag dependency.
+ // TBB / FBB data dependencies may delay the select even more.
+ MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
+ unsigned BranchDepth =
+ HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth;
+ DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+
+ // Look at all the tail phis, and compute the critical path extension caused
+ // by inserting select instructions.
+ MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
+ SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+ unsigned Slack = TailTrace.getInstrSlack(PI.PHI);
+ unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth;
+ DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+
+ // The condition is pulled into the critical path.
+ unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
+ if (CondDepth > MaxDepth) {
+ unsigned Extra = CondDepth - MaxDepth;
+ DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The TBB value is pulled into the critical path.
+ unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles);
+ if (TDepth > MaxDepth) {
+ unsigned Extra = TDepth - MaxDepth;
+ DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The FBB value is pulled into the critical path.
+ unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles);
+ if (FDepth > MaxDepth) {
+ unsigned Extra = FDepth - MaxDepth;
+ DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
+ // If-convert MBB and update analyses.
+ invalidateTraces();
+ SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks);
+ Changed = true;
+ updateDomTree(RemovedBlocks);
+ updateLoops(RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ SchedModel =
+ MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
+ MRI = &MF.getRegInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = 0;
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (po_iterator<MachineDominatorTree*>
+ I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I)
+ if (tryConvertIf(I->getBlock()))
+ Changed = true;
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 0000000..3bb0465
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,97 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+ cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+ /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ EC.clear();
+ EC.grow(2 * MF->getNumBlockIDs());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
+ const MachineBasicBlock &MBB = *I;
+ unsigned OutE = 2 * MBB.getNumber() + 1;
+ // Join the outgoing bundle with the ingoing bundles of all successors.
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ EC.join(OutE, 2 * (*SI)->getNumber());
+ }
+ EC.compress();
+ if (ViewEdgeBundles)
+ view();
+
+ // Compute the reverse mapping.
+ Blocks.clear();
+ Blocks.resize(getNumBundles());
+
+ for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
+ unsigned b0 = getBundle(i, 0);
+ unsigned b1 = getBundle(i, 1);
+ Blocks[b0].push_back(i);
+ if (b1 != b0)
+ Blocks[b1].push_back(i);
+ }
+
+ return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+ ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames,
+ const Twine &Title) {
+ const MachineFunction *MF = G.getMachineFunction();
+
+ O << "digraph {\n";
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ unsigned BB = I->getNumber();
+ O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+ << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+ << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+ for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end(); SI != SE; ++SI)
+ O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+ << "\" [ color=lightgray ]\n";
+ }
+ O << "}\n";
+ return O;
+}
diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp
new file mode 100644
index 0000000..8a1e2d9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ErlangGC.cpp
@@ -0,0 +1,81 @@
+//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Erlang/OTP runtime-compatible garbage collector
+// (e.g. defines safe points, root initialization etc.)
+//
+// The frametable emitter is in ErlangGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ErlangGC : public GCStrategy {
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+ public:
+ ErlangGC();
+ bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF);
+ };
+
+}
+
+static GCRegistry::Add<ErlangGC>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGC() { }
+
+ErlangGC::ErlangGC() {
+ InitRoots = false;
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+ CustomRoots = false;
+ CustomSafePoints = true;
+}
+
+MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+ MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+ ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+ MI != ME; ++MI)
+
+ if (MI->getDesc().isCall()) {
+
+ // Do not treat tail call sites as safe points.
+ if (MI->getDesc().isTerminator())
+ continue;
+
+ /* Code copied from VisitCallPoint(...) */
+ MachineBasicBlock::iterator RAI = MI; ++RAI;
+ MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc());
+ FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc());
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
new file mode 100644
index 0000000..9b0e76f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -0,0 +1,725 @@
+//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the execution dependency fix pass.
+//
+// Some X86 SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains. ARM cores
+// have similar issues when they are configured with both VFP and NEON
+// pipelines.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "execution-fix"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact that the register is now available in multiple
+/// domains.
+namespace {
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Bitmask of available domains. For an open DomainValue, it is the still
+ // possible domains for collapsing. For a collapsed DomainValue it is the
+ // domains where the register is available for free.
+ unsigned AvailableDomains;
+
+ // Pointer to the next DomainValue in a chain. When two DomainValues are
+ // merged, Victim.Next is set to point to Victor, so old DomainValue
+ // references can be updated by following the chain.
+ DomainValue *Next;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // A collapsed DomainValue has no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool isCollapsed() const { return Instrs.empty(); }
+
+ // Is domain available?
+ bool hasDomain(unsigned domain) const {
+ return AvailableDomains & (1u << domain);
+ }
+
+ // Mark domain as available.
+ void addDomain(unsigned domain) {
+ AvailableDomains |= 1u << domain;
+ }
+
+ // Restrict to a single domain available.
+ void setSingleDomain(unsigned domain) {
+ AvailableDomains = 1u << domain;
+ }
+
+ // Return bitmask of domains that are available and in mask.
+ unsigned getCommonDomains(unsigned mask) const {
+ return AvailableDomains & mask;
+ }
+
+ // First domain available.
+ unsigned getFirstDomain() const {
+ return CountTrailingZeros_32(AvailableDomains);
+ }
+
+ DomainValue() : Refs(0) { clear(); }
+
+ // Clear this DomainValue and point to next which has all its data.
+ void clear() {
+ AvailableDomains = 0;
+ Next = 0;
+ Instrs.clear();
+ }
+};
+}
+
+namespace {
+/// LiveReg - Information about a live register.
+struct LiveReg {
+ /// Value currently in this register, or NULL when no value is being tracked.
+ /// This counts as a DomainValue reference.
+ DomainValue *Value;
+
+ /// Instruction that defined this register, relative to the beginning of the
+ /// current basic block. When a LiveReg is used to represent a live-out
+ /// register, this value is relative to the end of the basic block, so it
+ /// will be a negative number.
+ int Def;
+};
+} // anonynous namespace
+
+namespace {
+class ExeDepsFix : public MachineFunctionPass {
+ static char ID;
+ SpecificBumpPtrAllocator<DomainValue> Allocator;
+ SmallVector<DomainValue*,16> Avail;
+
+ const TargetRegisterClass *const RC;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ std::vector<int> AliasMap;
+ const unsigned NumRegs;
+ LiveReg *LiveRegs;
+ typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
+ LiveOutMap LiveOuts;
+
+ /// Current instruction number.
+ /// The first instruction in each basic block is 0.
+ int CurInstr;
+
+ /// True when the current block has a predecessor that hasn't been visited
+ /// yet.
+ bool SeenUnknownBackEdge;
+
+public:
+ ExeDepsFix(const TargetRegisterClass *rc)
+ : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Execution dependency fix";
+ }
+
+private:
+ // Register mapping.
+ int regIndex(unsigned Reg);
+
+ // DomainValue allocation.
+ DomainValue *alloc(int domain = -1);
+ DomainValue *retain(DomainValue *DV) {
+ if (DV) ++DV->Refs;
+ return DV;
+ }
+ void release(DomainValue*);
+ DomainValue *resolve(DomainValue*&);
+
+ // LiveRegs manipulations.
+ void setLiveReg(int rx, DomainValue *DV);
+ void kill(int rx);
+ void force(int rx, unsigned domain);
+ void collapse(DomainValue *dv, unsigned domain);
+ bool merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock(MachineBasicBlock*);
+ void leaveBasicBlock(MachineBasicBlock*);
+ void visitInstr(MachineInstr*);
+ void processDefs(MachineInstr*, bool Kill);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
+};
+}
+
+char ExeDepsFix::ID = 0;
+
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int ExeDepsFix::regIndex(unsigned Reg) {
+ assert(Reg < AliasMap.size() && "Invalid register");
+ return AliasMap[Reg];
+}
+
+DomainValue *ExeDepsFix::alloc(int domain) {
+ DomainValue *dv = Avail.empty() ?
+ new(Allocator.Allocate()) DomainValue :
+ Avail.pop_back_val();
+ if (domain >= 0)
+ dv->addDomain(domain);
+ assert(dv->Refs == 0 && "Reference count wasn't cleared");
+ assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
+ return dv;
+}
+
+/// release - Release a reference to DV. When the last reference is released,
+/// collapse if needed.
+void ExeDepsFix::release(DomainValue *DV) {
+ while (DV) {
+ assert(DV->Refs && "Bad DomainValue");
+ if (--DV->Refs)
+ return;
+
+ // There are no more DV references. Collapse any contained instructions.
+ if (DV->AvailableDomains && !DV->isCollapsed())
+ collapse(DV, DV->getFirstDomain());
+
+ DomainValue *Next = DV->Next;
+ DV->clear();
+ Avail.push_back(DV);
+ // Also release the next DomainValue in the chain.
+ DV = Next;
+ }
+}
+
+/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
+/// reached. Update the referenced pointer when necessary.
+DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+ DomainValue *DV = DVRef;
+ if (!DV || !DV->Next)
+ return DV;
+
+ // DV has a chain. Find the end.
+ do DV = DV->Next;
+ while (DV->Next);
+
+ // Update DVRef to point to DV.
+ retain(DV);
+ release(DVRef);
+ DVRef = DV;
+ return DV;
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(LiveRegs && "Must enter basic block first.");
+
+ if (LiveRegs[rx].Value == dv)
+ return;
+ if (LiveRegs[rx].Value)
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = retain(dv);
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void ExeDepsFix::kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(LiveRegs && "Must enter basic block first.");
+ if (!LiveRegs[rx].Value)
+ return;
+
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = 0;
+}
+
+/// Force register rx into domain.
+void ExeDepsFix::force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(LiveRegs && "Must enter basic block first.");
+ if (DomainValue *dv = LiveRegs[rx].Value) {
+ if (dv->isCollapsed())
+ dv->addDomain(domain);
+ else if (dv->hasDomain(domain))
+ collapse(dv, domain);
+ else {
+ // This is an incompatible open DomainValue. Collapse it to whatever and
+ // force the new value into domain. This costs a domain crossing.
+ collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx].Value && "Not live after collapse?");
+ LiveRegs[rx].Value->addDomain(domain);
+ }
+ } else {
+ // Set up basic collapsed DomainValue.
+ setLiveReg(rx, alloc(domain));
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->hasDomain(domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty())
+ TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain);
+ dv->setSingleDomain(domain);
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (LiveRegs && dv->Refs > 1)
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx].Value == dv)
+ setLiveReg(rx, alloc(domain));
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
+ assert(!A->isCollapsed() && "Cannot merge into collapsed");
+ assert(!B->isCollapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ // Restrict to the domains that A and B have in common.
+ unsigned common = A->getCommonDomains(B->AvailableDomains);
+ if (!common)
+ return false;
+ A->AvailableDomains = common;
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+
+ // Clear the old DomainValue so we won't try to swizzle instructions twice.
+ B->clear();
+ // All uses of B are referred to A.
+ B->Next = retain(A);
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx].Value == B)
+ setLiveReg(rx, A);
+ return true;
+}
+
+// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
+ // Detect back-edges from predecessors we haven't processed yet.
+ SeenUnknownBackEdge = false;
+
+ // Reset instruction counter in each basic block.
+ CurInstr = 0;
+
+ // Set up LiveRegs to represent registers entering MBB.
+ if (!LiveRegs)
+ LiveRegs = new LiveReg[NumRegs];
+
+ // Default values are 'nothing happened a long time ago'.
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ LiveRegs[rx].Value = 0;
+ LiveRegs[rx].Def = -(1 << 20);
+ }
+
+ // This is the entry block.
+ if (MBB->pred_empty()) {
+ for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+ e = MBB->livein_end(); i != e; ++i) {
+ int rx = regIndex(*i);
+ if (rx < 0)
+ continue;
+ // Treat function live-ins as if they were defined just before the first
+ // instruction. Usually, function arguments are set up immediately
+ // before the call.
+ LiveRegs[rx].Def = -1;
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
+ return;
+ }
+
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) {
+ SeenUnknownBackEdge = true;
+ continue;
+ }
+ assert(fi->second && "Can't have NULL entries");
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ // Use the most recent predecessor def for each register.
+ LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+
+ DomainValue *pdv = resolve(fi->second[rx].Value);
+ if (!pdv)
+ continue;
+ if (!LiveRegs[rx].Value) {
+ setLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx].Value->isCollapsed()) {
+ // We are already collapsed, but predecessor is not. Force him.
+ unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
+ collapse(pdv, Domain);
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->isCollapsed())
+ merge(LiveRegs[rx].Value, pdv);
+ else
+ force(rx, pdv->getFirstDomain());
+ }
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber()
+ << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+}
+
+void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+ assert(LiveRegs && "Must enter basic block first.");
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ // Also use LiveOuts as a visited set to detect back-edges.
+ bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
+
+ if (First) {
+ // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to
+ // the end of this block instead of the beginning.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ LiveRegs[i].Def -= CurInstr;
+ } else {
+ // Insertion failed, this must be the second pass.
+ // Release all the DomainValues instead of keeping them.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ release(LiveRegs[i].Value);
+ delete[] LiveRegs;
+ }
+ LiveRegs = 0;
+}
+
+void ExeDepsFix::visitInstr(MachineInstr *MI) {
+ if (MI->isDebugValue())
+ return;
+
+ // Update instructions with explicit execution domains.
+ std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI);
+ if (DomP.first) {
+ if (DomP.second)
+ visitSoftInstr(MI, DomP.second);
+ else
+ visitHardInstr(MI, DomP.first);
+ }
+
+ // Process defs to track register ages, and kill values clobbered by generic
+ // instructions.
+ processDefs(MI, !DomP.first);
+}
+
+// Update def-ages for registers defined by MI.
+// If Kill is set, also kill off DomainValues clobbered by the defs.
+void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+ assert(!MI->isDebugValue() && "Won't process debug values");
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isImplicit())
+ break;
+ if (MO.isUse())
+ continue;
+ int rx = regIndex(MO.getReg());
+ if (rx < 0)
+ continue;
+
+ // This instruction explicitly defines rx.
+ DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+ << '\t' << *MI);
+
+ // How many instructions since rx was last written?
+ unsigned Clearance = CurInstr - LiveRegs[rx].Def;
+ LiveRegs[rx].Def = CurInstr;
+
+ // Kill off domains redefined by generic instructions.
+ if (Kill)
+ kill(rx);
+
+ // Verify clearance before partial register updates.
+ unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+ if (!Pref)
+ continue;
+ DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+ if (Pref > Clearance) {
+ DEBUG(dbgs() << ": Break dependency.\n");
+ TII->breakPartialRegDependency(MI, i, TRI);
+ continue;
+ }
+
+ // The current clearance seems OK, but we may be ignoring a def from a
+ // back-edge.
+ if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+ DEBUG(dbgs() << ": OK.\n");
+ continue;
+ }
+
+ // A def from an unprocessed back-edge may make us break this dependency.
+ DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
+ }
+
+ ++CurInstr;
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = regIndex(mo.getReg());
+ if (rx < 0) continue;
+ force(rx, domain);
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = regIndex(mo.getReg());
+ if (rx < 0) continue;
+ kill(rx);
+ force(rx, domain);
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Bitmask of available domains for this instruction after taking collapsed
+ // operands into account.
+ unsigned available = mask;
+
+ // Scan the explicit use operands for incoming domains.
+ SmallVector<int, 4> used;
+ if (LiveRegs)
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = regIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (DomainValue *dv = LiveRegs[rx].Value) {
+ // Bitmask of domains that dv and available have in common.
+ unsigned common = dv->getCommonDomains(available);
+ // Is it possible to use this collapsed register for free?
+ if (dv->isCollapsed()) {
+ // Restrict available domains to the ones in common with the operand.
+ // If there are no common domains, we must pay the cross-domain
+ // penalty for this operand.
+ if (common) available = common;
+ } else if (common)
+ // Open DomainValue is compatible, save it for merging.
+ used.push_back(rx);
+ else
+ // Open DomainValue is not compatible with instruction. It is useless
+ // now.
+ kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(available)) {
+ unsigned domain = CountTrailingZeros_32(available);
+ TII->setExecutionDomain(mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match available, and build a list of
+ // incoming DomainValues that we want to merge.
+ SmallVector<LiveReg, 4> Regs;
+ for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ const LiveReg &LR = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!LR.Value->getCommonDomains(available)) {
+ kill(rx);
+ continue;
+ }
+ // Sorted insertion.
+ bool Inserted = false;
+ for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end();
+ i != e && !Inserted; ++i) {
+ if (LR.Def < i->Def) {
+ Inserted = true;
+ Regs.insert(i, LR);
+ }
+ }
+ if (!Inserted)
+ Regs.push_back(LR);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = 0;
+ while (!Regs.empty()) {
+ if (!dv) {
+ dv = Regs.pop_back_val().Value;
+ // Force the first dv to match the current instruction.
+ dv->AvailableDomains = dv->getCommonDomains(available);
+ assert(dv->AvailableDomains && "Domain should have been filtered");
+ continue;
+ }
+
+ DomainValue *Latest = Regs.pop_back_val().Value;
+ // Skip already merged values.
+ if (Latest == dv || Latest->Next)
+ continue;
+ if (merge(dv, Latest))
+ continue;
+
+ // If latest didn't merge, it is useless now. Kill all registers using it.
+ for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+ if (LiveRegs[*i].Value == Latest)
+ kill(*i);
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv) {
+ dv = alloc();
+ dv->AvailableDomains = available;
+ }
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv. We must iterate through
+ // all the operators, including imp-def ones.
+ for (MachineInstr::mop_iterator ii = mi->operands_begin(),
+ ee = mi->operands_end();
+ ii != ee; ++ii) {
+ MachineOperand &mo = *ii;
+ if (!mo.isReg()) continue;
+ int rx = regIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
+ kill(rx);
+ setLiveReg(rx, dv);
+ }
+ }
+}
+
+bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ TII = MF->getTarget().getInstrInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+ LiveRegs = 0;
+ assert(NumRegs == RC->getNumRegs() && "Bad regclass");
+
+ DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
+ << RC->getName() << " **********\n");
+
+ // If no relevant registers are used in the function, we can skip it
+ // completely.
+ bool anyregs = false;
+ for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ anyregs = true;
+ break;
+ }
+ if (!anyregs) return false;
+
+ // Initialize the AliasMap on the first use.
+ if (AliasMap.empty()) {
+ // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC,
+ // or -1.
+ AliasMap.resize(TRI->getNumRegs(), -1);
+ for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
+ for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
+ AI.isValid(); ++AI)
+ AliasMap[*AI] = i;
+ }
+
+ MachineBasicBlock *Entry = MF->begin();
+ ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
+ SmallVector<MachineBasicBlock*, 16> Loops;
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock *MBB = *MBBI;
+ enterBasicBlock(MBB);
+ if (SeenUnknownBackEdge)
+ Loops.push_back(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I)
+ visitInstr(I);
+ leaveBasicBlock(MBB);
+ }
+
+ // Visit all the loop blocks again in order to merge DomainValues from
+ // back-edges.
+ for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Loops[i];
+ enterBasicBlock(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I)
+ if (!I->isDebugValue())
+ processDefs(I, false);
+ leaveBasicBlock(MBB);
+ }
+
+ // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+ if (FI == LiveOuts.end() || !FI->second)
+ continue;
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ if (FI->second[i].Value)
+ release(FI->second[i].Value);
+ delete[] FI->second;
+ }
+ LiveOuts.clear();
+ Avail.clear();
+ Allocator.DestroyAll();
+
+ return false;
+}
+
+FunctionPass *
+llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
+ return new ExeDepsFix(RC);
+}
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 0000000..b2b6882
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,74 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Pseudo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+namespace {
+ class ExpandISelPseudos : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+ private:
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+ "Expand ISel Pseudo-instructions", false, false)
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+ // Iterate through each instruction in the function, looking for pseudos.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE; ) {
+ MachineInstr *MI = MBBI++;
+
+ // If MI is a pseudo, expand it.
+ if (MI->usesCustomInsertionHook()) {
+ Changed = true;
+ MachineBasicBlock *NewMBB =
+ TLI->EmitInstrWithCustomInserter(MI, MBB);
+ // The expansion may involve new basic blocks.
+ if (NewMBB != MBB) {
+ MBB = NewMBB;
+ I = NewMBB;
+ MBBI = NewMBB->begin();
+ MBBE = NewMBB->end();
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
new file mode 100644
index 0000000..1611db8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -0,0 +1,225 @@
+//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo
+// instructions after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "postrapseudos"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+namespace {
+struct ExpandPostRA : public MachineFunctionPass {
+private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandPostRA() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&);
+
+private:
+ bool LowerSubregToReg(MachineInstr *MI);
+ bool LowerCopy(MachineInstr *MI);
+
+ void TransferImplicitDefs(MachineInstr *MI);
+};
+} // end anonymous namespace
+
+char ExpandPostRA::ID = 0;
+char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
+
+INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
+ "Post-RA pseudo instruction expansion pass", false, false)
+
+/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit-def
+/// operands from MI to the replacement instruction.
+void
+ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit() || MO.isUse())
+ continue;
+ CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true));
+ }
+}
+
+bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ MI->getOperand(1).isImm() &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned InsReg = MI->getOperand(2).getReg();
+ assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Insert destination must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+ if (MI->allDefsAreDead()) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "subreg: replaced by: " << *MI);
+ return true;
+ }
+
+ if (DstSubReg == InsReg) {
+ // No need to insert an identify copy instruction.
+ // Watch out for case like this:
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
+ // We must leave %RAX live.
+ if (DstReg != InsReg) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(3); // SubIdx
+ MI->RemoveOperand(1); // Imm
+ DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ return true;
+ }
+ DEBUG(dbgs() << "subreg: eliminated!");
+ } else {
+ TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+ MI->getOperand(2).isKill());
+
+ // Implicitly define DstReg for subsequent uses.
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+ CopyMI->addRegisterDefined(DstReg);
+ DEBUG(dbgs() << "subreg: " << *CopyMI);
+ }
+
+ DEBUG(dbgs() << '\n');
+ MBB->erase(MI);
+ return true;
+}
+
+bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
+
+ if (MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "dead copy: " << *MI);
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+ }
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if (SrcMO.getReg() == DstMO.getReg()) {
+ DEBUG(dbgs() << "identity copy: " << *MI);
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+ }
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return true;
+ }
+
+ DEBUG(dbgs() << "real copy: " << *MI);
+ TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
+ DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
+
+ if (MI->getNumOperands() > 2)
+ TransferImplicitDefs(MI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "replaced by: " << *(--dMI);
+ });
+ MI->eraseFromParent();
+ return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Machine Function\n"
+ << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ bool MadeChange = false;
+
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me;) {
+ MachineInstr *MI = mi;
+ // Advance iterator here because MI may be erased.
+ ++mi;
+
+ // Only expand pseudos.
+ if (!MI->isPseudo())
+ continue;
+
+ // Give targets a chance to expand even standard pseudos.
+ if (TII->expandPostRAPseudo(MI)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Expand standard pseudos.
+ switch (MI->getOpcode()) {
+ case TargetOpcode::SUBREG_TO_REG:
+ MadeChange |= LowerSubregToReg(MI);
+ break;
+ case TargetOpcode::COPY:
+ MadeChange |= LowerCopy(MI);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ continue;
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::EXTRACT_SUBREG:
+ llvm_unreachable("Sub-register pseudos should have been eliminated.");
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 0000000..ef5247c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,178 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+ class Printer : public FunctionPass {
+ static char ID;
+ raw_ostream &OS;
+
+ public:
+ explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+
+}
+
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+ "Create Garbage Collector Module Metadata", false, false)
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+ : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo()
+ : ImmutablePass(ID) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+GCModuleInfo::~GCModuleInfo() {
+ clear();
+}
+
+GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
+ const std::string &Name) {
+ strategy_map_type::iterator NMI = StrategyMap.find(Name);
+ if (NMI != StrategyMap.end())
+ return NMI->getValue();
+
+ for (GCRegistry::iterator I = GCRegistry::begin(),
+ E = GCRegistry::end(); I != E; ++I) {
+ if (Name == I->getName()) {
+ GCStrategy *S = I->instantiate();
+ S->M = M;
+ S->Name = Name;
+ StrategyMap.GetOrCreateValue(Name).setValue(S);
+ StrategyList.push_back(S);
+ return S;
+ }
+ }
+
+ dbgs() << "unsupported GC: " << Name << "\n";
+ llvm_unreachable(0);
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC());
+
+ finfo_map_type::iterator I = FInfoMap.find(&F);
+ if (I != FInfoMap.end())
+ return *I->second;
+
+ GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+ GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+ FInfoMap[&F] = GFI;
+ return *GFI;
+}
+
+void GCModuleInfo::clear() {
+ FInfoMap.clear();
+ StrategyMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ StrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
+ return new Printer(OS);
+}
+
+
+const char *Printer::getPassName() const {
+ return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+ switch (Kind) {
+ case GC::Loop: return "loop";
+ case GC::Return: return "return";
+ case GC::PreCall: return "pre-call";
+ case GC::PostCall: return "post-call";
+ }
+ llvm_unreachable("Invalid point kind");
+}
+
+bool Printer::runOnFunction(Function &F) {
+ if (F.hasGC()) return false;
+
+ GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+
+ OS << "GC roots for " << FD->getFunction().getName() << ":\n";
+ for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+ RE = FD->roots_end(); RI != RE; ++RI)
+ OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+
+ OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
+ for (GCFunctionInfo::iterator PI = FD->begin(),
+ PE = FD->end(); PI != PE; ++PI) {
+
+ OS << "\t" << PI->Label->getName() << ": "
+ << DescKind(PI->Kind) << ", live = {";
+
+ for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+ RE = FD->live_end(PI);;) {
+ OS << " " << RI->Num;
+ if (++RI == RE)
+ break;
+ OS << ",";
+ }
+
+ OS << " }\n";
+ }
+
+ return false;
+}
+
+bool Printer::doFinalization(Module &M) {
+ GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(GMI && "Printer didn't require GCModuleInfo?!");
+ GMI->clear();
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 0000000..f80e9ce
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,27 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() { }
+
+GCMetadataPrinter::~GCMetadataPrinter() { }
+
+void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+ // Default is no action.
+}
+
+void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+ // Default is no action.
+}
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 0000000..1173d11
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,430 @@
+//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+// GCMachineCodeAnalysis identifies the GC safe points in the machine code.
+// Roots are identified in SelectionDAGISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+ /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// directed by the GCStrategy. It also performs automatic root initialization
+ /// and custom intrinsic lowering.
+ class LowerIntrinsics : public FunctionPass {
+ static bool NeedsDefaultLoweringPass(const GCStrategy &C);
+ static bool NeedsCustomLoweringPass(const GCStrategy &C);
+ static bool CouldBecomeSafePoint(Instruction *I);
+ bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+ static bool InsertRootInitializers(Function &F,
+ AllocaInst **Roots, unsigned Count);
+
+ public:
+ static char ID;
+
+ LowerIntrinsics();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+ };
+
+
+ /// GCMachineCodeAnalysis - This is a target-independent pass over the machine
+ /// function representation to identify safe points for the garbage collector
+ /// in the machine code. It inserts labels at safe points and populates a
+ /// GCMetadata record for each function.
+ class GCMachineCodeAnalysis : public MachineFunctionPass {
+ const TargetMachine *TM;
+ GCFunctionInfo *FI;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+ public:
+ static char ID;
+
+ GCMachineCodeAnalysis();
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ };
+
+}
+
+// -----------------------------------------------------------------------------
+
+GCStrategy::GCStrategy() :
+ NeededSafePoints(0),
+ CustomReadBarriers(false),
+ CustomWriteBarriers(false),
+ CustomRoots(false),
+ CustomSafePoints(false),
+ InitRoots(true),
+ UsesMetadata(false)
+{}
+
+GCStrategy::~GCStrategy() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+
+ Functions.clear();
+}
+
+bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
+
+bool GCStrategy::performCustomLowering(Function &F) {
+ dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+ llvm_unreachable("must override performCustomLowering");
+}
+
+
+bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) {
+ dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n";
+ llvm_unreachable(0);
+}
+
+
+GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
+ GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
+ Functions.push_back(FI);
+ return FI;
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() {
+ return new LowerIntrinsics();
+}
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics()
+ : FunctionPass(ID) {
+ initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+ }
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<DominatorTree>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ // FIXME: This is rather antisocial in the context of a JIT since it performs
+ // work against the entire module. But this cannot be done at
+ // runFunction time (initializeCustomLowering likely needs to change
+ // the module).
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && I->hasGC())
+ MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+ bool MadeChange = false;
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (NeedsCustomLoweringPass(**I))
+ if ((*I)->initializeCustomLowering(M))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP)) ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst*,16> InitedRoots;
+ for (; !CouldBecomeSafePoint(IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I)) {
+ StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+ cast<PointerType>((*I)->getType())->getElementType())),
+ *I);
+ SI->insertAfter(*I);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier()
+ || !C.customReadBarrier()
+ || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
+ // Custom lowering is only necessary if enabled for some action.
+ return C.customWriteBarrier()
+ || C.customReadBarrier()
+ || C.customRoots();
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I) || isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (unsigned IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(S))
+ MadeChange |= PerformDefaultLowering(F, S);
+
+ bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+ if (UseCustomLoweringPass)
+ MadeChange |= S.performCustomLowering(F);
+
+ // Custom lowering may modify the CFG, so dominators must be recomputed.
+ if (UseCustomLoweringPass) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->DT->recalculate(F);
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+ bool LowerWr = !S.customWriteBarrier();
+ bool LowerRd = !S.customReadBarrier();
+ bool InitRoots = S.initializeRoots();
+
+ SmallVector<AllocaInst*, 32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::gcwrite:
+ if (LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getArgOperand(0),
+ CI->getArgOperand(2), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcread:
+ if (LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcroot:
+ if (InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(cast<AllocaInst>(
+ CI->getArgOperand(0)->stripPointerCasts()));
+ }
+ break;
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
+
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+ "Analyze Machine Code For Garbage Collection", false, false)
+
+GCMachineCodeAnalysis::GCMachineCodeAnalysis()
+ : MachineFunctionPass(ID) {}
+
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+ }
+
+ if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+ }
+}
+
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(),
+ BBE = MF.end(); BBI != BBE; ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(),
+ ME = BBI->end(); MI != ME; ++MI)
+ if (MI->isCall())
+ VisitCallPoint(MI);
+}
+
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetFrameLowering *TFI = TM->getFrameLowering();
+ assert(TFI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+ RI != FI->roots_end();) {
+ // If the root references a dead object, no need to keep it.
+ if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+ RI = FI->removeStackRoot(RI);
+ } else {
+ RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+ ++RI;
+ }
+ }
+}
+
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ if (!FI->getStrategy().needsSafePoints())
+ return false;
+
+ TM = &MF.getTarget();
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = TM->getInstrInfo();
+
+ // Find the size of the stack frame.
+ FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+
+ // Find all safe points.
+ if (FI->getStrategy().customSafePoints()) {
+ FI->getStrategy().findCustomSafePoints(*FI, MF);
+ } else {
+ FindSafePoints(MF);
+ }
+
+ // Find the stack offsets for all roots.
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 0000000..9958d7d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1583 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "llvm/CodeGen/Passes.h"
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
+
+namespace {
+ class IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond // BB is entry of a diamond sub-CFG.
+ };
+
+ /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if AnalyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// ExtraCost - Extra cost for multi-cycle instructions.
+ /// ExtraCost2 - Some instructions are slower when predicated
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See AnalyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize;
+ unsigned ExtraCost;
+ unsigned ExtraCost2;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *TrueBB;
+ MachineBasicBlock *FalseBB;
+ SmallVector<MachineOperand, 4> BrCond;
+ SmallVector<MachineOperand, 4> Predicate;
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ HasFallThrough(false), IsUnpredicable(false),
+ CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+ ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
+ };
+
+ /// IfcvtToken - Record information about pending if-conversions to attempt:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumption - True if the to-be-predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ bool NeedSubsumption;
+ unsigned NumDups;
+ unsigned NumDups2;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+ : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ };
+
+ /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+ /// basic block number.
+ std::vector<BBInfo> BBAnalysis;
+
+ const TargetLoweringBase *TLI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ const MachineBranchProbabilityInfo *MBPI;
+ MachineRegisterInfo *MRI;
+
+ bool PreRegAlloc;
+ bool MadeChange;
+ int FnNum;
+ public:
+ static char ID;
+ IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+ initializeIfConverterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ bool ReverseBranchCondition(BBInfo &BBI);
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ const BranchProbability &Prediction) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ const BranchProbability &Prediction) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const;
+ void ScanInstructions(BBInfo &BBI);
+ BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+ bool isTriangle = false, bool RevBranch = false);
+ void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
+ void InvalidatePreds(MachineBasicBlock *BB);
+ void RemoveExtraEdges(BBInfo &BBI);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ SmallSet<unsigned, 4> *LaterRedefs = 0);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+ unsigned Cycle, unsigned Extra,
+ const BranchProbability &Prediction) const {
+ return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+ Prediction);
+ }
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+ unsigned TCycle, unsigned TExtra,
+ MachineBasicBlock &FBB,
+ unsigned FCycle, unsigned FExtra,
+ const BranchProbability &Prediction) const {
+ return TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+ Prediction);
+ }
+
+ // blockAlwaysFallThrough - Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+ }
+
+ // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumption.
+ if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+ return true;
+ else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+ char IfConverter::ID = 0;
+}
+
+char &llvm::IfConverterID = IfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MRI = &MF.getRegInfo();
+ InstrItins = MF.getTarget().getInstrItineraryData();
+ if (!TII) return false;
+
+ PreRegAlloc = MRI->isSSA();
+
+ bool BFChange = false;
+ if (!PreRegAlloc) {
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true, false);
+ BFChange = BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+ }
+
+ DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getName() << "\'");
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ DEBUG(dbgs() << " skipped\n");
+ return false;
+ }
+ DEBUG(dbgs() << "\n");
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ std::vector<IfcvtToken*> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an initial analysis for each basic block and find all the potential
+ // candidates to perform if-conversion.
+ bool Change = false;
+ AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+ unsigned NumDups = Token->NumDups;
+ unsigned NumDups2 = Token->NumDups2;
+
+ delete Token;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected!");
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
+ " false" : "")
+ << "): BB#" << BBI.BB->getNumber() << " ("
+ << ((Kind == ICSimpleFalse)
+ ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber()) << ") ");
+ RetVal = IfConvertSimple(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) ++NumSimpleFalse;
+ else ++NumSimple;
+ }
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ if (DisableTriangleFR && isFalse && isRev) break;
+ DEBUG(dbgs() << "Ifcvt (Triangle");
+ if (isFalse)
+ DEBUG(dbgs() << " false");
+ if (isRev)
+ DEBUG(dbgs() << " rev");
+ DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertTriangle(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) ++NumTriangleFRev;
+ else ++NumTriangleFalse;
+ } else {
+ if (isRev) ++NumTriangleRev;
+ else ++NumTriangle;
+ }
+ }
+ break;
+ }
+ case ICDiamond: {
+ if (DisableDiamond) break;
+ DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) ++NumDiamonds;
+ break;
+ }
+ }
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ // Delete tokens in case of early exit.
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ delete Token;
+ }
+
+ Tokens.clear();
+ BBAnalysis.clear();
+
+ if (MadeChange && IfCvtBranchFold) {
+ BranchFolder BF(false, false);
+ BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+ }
+
+ MadeChange |= BFChange;
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ DebugLoc dl; // FIXME: this is nowhere
+ if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+ TII->RemoveBranch(*BBI.BB);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ if (++I == E)
+ return NULL;
+ return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ const BranchProbability &Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+ Prediction))
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the 'false' block rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ const BranchProbability &Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+ // Ends with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB;
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+ (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ return false;
+
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(FIB))
+ break;
+ ++Dups1;
+ ++TIB;
+ ++FIB;
+ }
+
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, move the end iterators up past any branch instructions.
+ while (TIE != TIB) {
+ --TIE;
+ if (!TIE->isBranch())
+ break;
+ }
+ while (FIE != FIB) {
+ --FIE;
+ if (!FIE->isBranch())
+ break;
+ }
+
+ // If Dups1 includes all of a block, then don't count duplicate
+ // instructions at the end of the blocks.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+
+ // Count duplicate instructions at the ends of the blocks.
+ while (TIE != TIB && FIE != FIB) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIE->isDebugValue()) {
+ while (TIE != TIB && TIE->isDebugValue())
+ --TIE;
+ if (TIE == TIB)
+ break;
+ }
+ if (FIE->isDebugValue()) {
+ while (FIE != FIB && FIE->isDebugValue())
+ --FIE;
+ if (FIE == FIB)
+ break;
+ }
+ if (!TIE->isIdenticalTo(FIE))
+ break;
+ ++Dups2;
+ --TIE;
+ --FIE;
+ }
+
+ return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ bool AlreadyPredicated = !BBI.Predicate.empty();
+ // First analyze the end of BB branches.
+ BBI.TrueBB = BBI.FalseBB = NULL;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ if (!BBI.FalseBB) {
+ // Malformed bcc? True and false blocks are the same?
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ // Then scan all the instructions.
+ BBI.NonPredSize = 0;
+ BBI.ExtraCost = 0;
+ BBI.ExtraCost2 = 0;
+ BBI.ClobbersPred = false;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ if (I->isNotDuplicable())
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(I);
+ bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
+
+ if (!isCondBr) {
+ if (!isPredicated) {
+ BBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+ &ExtraPredCost);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ if (isCondBr) {
+ // A conditional branch is not predicable, but it may be eliminated.
+ continue;
+ }
+
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(I, PredDefs))
+ BBI.ClobbersPred = true;
+
+ if (!TII->isPredicable(I)) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return false;
+
+ // If it is already predicated, check if its predicate subsumes the new
+ // predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+ return false;
+
+ if (BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumption.
+ SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (RevBranch) {
+ if (TII->ReverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->ReverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens) {
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+ return BBI;
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ ScanInstructions(BBI);
+
+ // Unanalyzable or ends with fallthrough or unconditional branch, or if is not
+ // considered for ifcvt anymore.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if true and false fallthrough blocks are the same.
+ if (!BBI.FalseBB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens);
+ BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = !TrueBBI.Predicate.empty();
+ bool FNeedSub = !FalseBBI.Predicate.empty();
+ bool Enqueued = false;
+
+ BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
+
+ if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+ TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+ *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+ FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+ Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+ Dups2));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates.
+void IfConverter::AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ AnalyzeBlock(BB, Tokens);
+ }
+
+ // Sort to favor more complex ifcvt scheme.
+ std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+ MachineFunction::iterator PI = BB;
+ MachineFunction::iterator I = llvm::next(PI);
+ MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ while (I != TI) {
+ // Check isSuccessor to avoid case where the next block is empty, but
+ // it's not a successor.
+ if (I == E || !I->empty() || !PI->isSuccessor(I))
+ return false;
+ PI = I++;
+ }
+ return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == BB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc dl; // FIXME: this is nowhere
+ SmallVector<MachineOperand, 0> NoCond;
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+ MachineBasicBlock *TBB = NULL, *FBB = NULL;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are
+/// modeled as read + write (sort like two-address instructions). These
+/// routines track register liveness and add implicit uses to if-converted
+/// instructions to conform to the model.
+static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+ E = BB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Redefs.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.insert(*SubRegs);
+ }
+}
+
+static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI,
+ bool AddImpUse = false) {
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (MO.isKill()) {
+ Redefs.erase(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.erase(*SubRegs);
+ }
+ }
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (!Redefs.insert(Reg)) {
+ if (AddImpUse)
+ // Treat predicated update as read + write.
+ MIB.addReg(Reg, RegState::Implicit | RegState::Undef);
+ } else {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.insert(*SubRegs);
+ }
+ }
+}
+
+static void UpdatePredRedefs(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ while (I != E) {
+ UpdatePredRedefs(I, Redefs, TRI);
+ ++I;
+ }
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICSimpleFalse)
+ if (TII->ReverseBranchCondition(Cond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+ } else {
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+ // Merge converted block into entry block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+ DebugLoc dl; // FIXME: this is nowhere
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ if (TII->ReverseBranchCondition(Cond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ if (ReverseBranchCondition(*CvtBBI)) {
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+ E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PBB = *PI;
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+ }
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+ bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+ // Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI, false);
+ }
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+ CvtBBI->BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ llvm_unreachable("Unable to reverse branch condition!");
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add an unconditional branch to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 ||
+ FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ // Put the predicated instructions from the 'true' block before the
+ // instructions from the 'false' block, unless the true block would clobber
+ // the predicate, in which case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ llvm_unreachable("Unable to reverse branch condition!");
+ SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+ SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ DoSwap = true;
+ else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ }
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(BBI1->BB, Redefs, TRI);
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+ MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ MachineBasicBlock::iterator DIE1 = BBI1->BB->end();
+ MachineBasicBlock::iterator DIE2 = BBI2->BB->end();
+ // Skip dbg_value instructions
+ while (DI1 != DIE1 && DI1->isDebugValue())
+ ++DI1;
+ while (DI2 != DIE2 && DI2->isDebugValue())
+ ++DI2;
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+
+ // Skip past the dups on each side separately since there may be
+ // differing dbg_value entries.
+ for (unsigned i = 0; i < NumDups1; ++DI1) {
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ while (NumDups1 != 0) {
+ ++DI2;
+ if (!DI2->isDebugValue())
+ --NumDups1;
+ }
+
+ UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI);
+ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+ BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+ // Remove branch from 'true' block and remove duplicated instructions.
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ DI1 = BBI1->BB->end();
+ for (unsigned i = 0; i != NumDups2; ) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI1 != BBI1->BB->begin());
+ --DI1;
+ // skip dbg_value instructions
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ BBI1->BB->erase(DI1, BBI1->BB->end());
+
+ // Remove 'false' block branch and find the last instruction to predicate.
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ DI2 = BBI2->BB->end();
+ while (NumDups2 != 0) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI2 != BBI2->BB->begin());
+ --DI2;
+ // skip dbg_value instructions
+ if (!DI2->isDebugValue())
+ --NumDups2;
+ }
+
+ // Remember which registers would later be defined by the false block.
+ // This allows us not to predicate instructions in the true block that would
+ // later be re-defined. That is, rather than
+ // subeq r0, r1, #1
+ // addne r0, r1, #1
+ // generate:
+ // sub r0, r1, #1
+ // addne r0, r1, #1
+ SmallSet<unsigned, 4> RedefsByFalse;
+ SmallSet<unsigned, 4> ExtUses;
+ if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
+ for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
+ if (FI->isDebugValue())
+ continue;
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = FI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ } else if (!RedefsByFalse.count(Reg)) {
+ // These are defined before ctrl flow reach the 'false' instructions.
+ // They cannot be modified by the 'true' instructions.
+ ExtUses.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ ExtUses.insert(*SubRegs);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (!ExtUses.count(Reg)) {
+ RedefsByFalse.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RedefsByFalse.insert(*SubRegs);
+ }
+ }
+ }
+ }
+
+ // Predicate the 'true' block.
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse);
+
+ // Predicate the 'false' block.
+ PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1, TailBB == 0);
+ MergeBlocks(BBI, *BBI2, TailBB == 0);
+
+ // If the if-converted block falls through or unconditionally branches into
+ // the tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
+ bool CanMergeTail = !TailBBI.HasFallThrough;
+ // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+ // check if there are any other predecessors besides those.
+ unsigned NumPreds = TailBB->pred_size();
+ if (NumPreds > 1)
+ CanMergeTail = false;
+ else if (NumPreds == 1 && CanMergeTail) {
+ MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+ if (*PI != BBI1->BB && *PI != BBI2->BB)
+ CanMergeTail = false;
+ }
+ if (CanMergeTail) {
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ BBI.BB->addSuccessor(TailBB);
+ InsertUncondBranch(BBI.BB, TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch,
+ // which can happen here if TailBB is unanalyzable and is merged, so
+ // explicitly remove BBI1 and BBI2 as successors.
+ BBI.BB->removeSuccessor(BBI1->BB);
+ BBI.BB->removeSuccessor(BBI2->BB);
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+static bool MaySpeculate(const MachineInstr *MI,
+ SmallSet<unsigned, 4> &LaterRedefs,
+ const TargetInstrInfo *TII) {
+ bool SawStore = true;
+ if (!MI->isSafeToMove(TII, 0, SawStore))
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef() && !LaterRedefs.count(Reg))
+ return false;
+ }
+
+ return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ SmallSet<unsigned, 4> *LaterRedefs) {
+ bool AnyUnpred = false;
+ bool MaySpec = LaterRedefs != 0;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+ if (I->isDebugValue() || TII->isPredicated(I))
+ continue;
+ // It may be possible not to predicate an instruction if it's the 'true'
+ // side of a diamond and the 'false' side may re-define the instruction's
+ // defs.
+ if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) {
+ AnyUnpred = true;
+ continue;
+ }
+ // If any instruction is predicated, then every instruction after it must
+ // be predicated.
+ MaySpec = false;
+ if (!TII->PredicateInstruction(I, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(I, Redefs, TRI, true);
+ }
+
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ ++NumIfConvBBs;
+ if (AnyUnpred)
+ ++NumUnpred;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ bool IgnoreBr) {
+ MachineFunction &MF = *ToBBI.BB->getParent();
+
+ for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+ E = FromBBI.BB->end(); I != E; ++I) {
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && I->isBranch())
+ break;
+
+ MachineInstr *MI = MF.CloneMachineInstr(I);
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+ if (NumCycles > 1)
+ ToBBI.ExtraCost += NumCycles-1;
+ ToBBI.ExtraCost2 += ExtraPredCost;
+
+ if (!TII->isPredicated(I) && !MI->isDebugValue()) {
+ if (!TII->PredicateInstruction(MI, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(MI, Redefs, TRI, true);
+ }
+
+ if (!IgnoreBr) {
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
+ }
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ ++NumDupBBs;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+/// This will leave FromBB as an empty block, so remove all of its
+/// successor edges except for the fall-through edge. If AddEdges is true,
+/// i.e., when FromBBI's branch is being moved, add those successor edges to
+/// ToBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+ ToBBI.BB->splice(ToBBI.BB->end(),
+ FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ FromBBI.BB->removeSuccessor(Succ);
+ if (AddEdges && !ToBBI.BB->isSuccessor(Succ))
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ // Now FromBBI always falls through to the next block!
+ if (NBB && !FromBBI.BB->isSuccessor(NBB))
+ FromBBI.BB->addSuccessor(NBB);
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ ToBBI.ExtraCost += FromBBI.ExtraCost;
+ ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
+ FromBBI.NonPredSize = 0;
+ FromBBI.ExtraCost = 0;
+ FromBBI.ExtraCost2 = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
new file mode 100644
index 0000000..c6d1a18
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -0,0 +1,1295 @@
+//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The inline spiller modifies the machine function directly instead of
+// inserting spills and restores in VirtRegMap.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "Spiller.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+STATISTIC(NumSpilledRanges, "Number of spilled live ranges");
+STATISTIC(NumSnippets, "Number of spilled snippets");
+STATISTIC(NumSpills, "Number of spills inserted");
+STATISTIC(NumSpillsRemoved, "Number of spills removed");
+STATISTIC(NumReloads, "Number of reloads inserted");
+STATISTIC(NumReloadsRemoved, "Number of reloads removed");
+STATISTIC(NumFolded, "Number of folded stack accesses");
+STATISTIC(NumFoldedLoads, "Number of folded loads");
+STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
+STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads");
+STATISTIC(NumHoists, "Number of hoisted spills");
+
+static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
+ cl::desc("Disable inline spill hoisting"));
+
+namespace {
+class InlineSpiller : public Spiller {
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ AliasAnalysis *AA;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineFrameInfo &MFI;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+ // Variables that are valid during spill(), but used by multiple methods.
+ LiveRangeEdit *Edit;
+ LiveInterval *StackInt;
+ int StackSlot;
+ unsigned Original;
+
+ // All registers to spill to StackSlot, including the main register.
+ SmallVector<unsigned, 8> RegsToSpill;
+
+ // All COPY instructions to/from snippets.
+ // They are ignored since both operands refer to the same stack slot.
+ SmallPtrSet<MachineInstr*, 8> SnippetCopies;
+
+ // Values that failed to remat at some point.
+ SmallPtrSet<VNInfo*, 8> UsedValues;
+
+public:
+ // Information about a value that was defined by a copy from a sibling
+ // register.
+ struct SibValueInfo {
+ // True when all reaching defs were reloads: No spill is necessary.
+ bool AllDefsAreReloads;
+
+ // True when value is defined by an original PHI not from splitting.
+ bool DefByOrigPHI;
+
+ // True when the COPY defining this value killed its source.
+ bool KillsSource;
+
+ // The preferred register to spill.
+ unsigned SpillReg;
+
+ // The value of SpillReg that should be spilled.
+ VNInfo *SpillVNI;
+
+ // The block where SpillVNI should be spilled. Currently, this must be the
+ // block containing SpillVNI->def.
+ MachineBasicBlock *SpillMBB;
+
+ // A defining instruction that is not a sibling copy or a reload, or NULL.
+ // This can be used as a template for rematerialization.
+ MachineInstr *DefMI;
+
+ // List of values that depend on this one. These values are actually the
+ // same, but live range splitting has placed them in different registers,
+ // or SSA update needed to insert PHI-defs to preserve SSA form. This is
+ // copies of the current value and phi-kills. Usually only phi-kills cause
+ // more than one dependent value.
+ TinyPtrVector<VNInfo*> Deps;
+
+ SibValueInfo(unsigned Reg, VNInfo *VNI)
+ : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
+ SpillReg(Reg), SpillVNI(VNI), SpillMBB(0), DefMI(0) {}
+
+ // Returns true when a def has been found.
+ bool hasDef() const { return DefByOrigPHI || DefMI; }
+ };
+
+private:
+ // Values in RegsToSpill defined by sibling copies.
+ typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
+ SibValueMap SibValues;
+
+ // Dead defs generated during spilling.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ ~InlineSpiller() {}
+
+public:
+ InlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm)
+ : MF(mf),
+ LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AliasAnalysis>()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()),
+ VRM(vrm),
+ MFI(*mf.getFrameInfo()),
+ MRI(mf.getRegInfo()),
+ TII(*mf.getTarget().getInstrInfo()),
+ TRI(*mf.getTarget().getRegisterInfo()) {}
+
+ void spill(LiveRangeEdit &);
+
+private:
+ bool isSnippet(const LiveInterval &SnipLI);
+ void collectRegsToSpill();
+
+ bool isRegToSpill(unsigned Reg) {
+ return std::find(RegsToSpill.begin(),
+ RegsToSpill.end(), Reg) != RegsToSpill.end();
+ }
+
+ bool isSibling(unsigned Reg);
+ MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
+ void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = 0);
+ void analyzeSiblingValues();
+
+ bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI);
+ void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
+
+ void markValueUsed(LiveInterval*, VNInfo*);
+ bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI);
+ void reMaterializeAll();
+
+ bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
+ bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
+ MachineInstr *LoadMI = 0);
+ void insertReload(LiveInterval &NewLI, SlotIndex,
+ MachineBasicBlock::iterator MI);
+ void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+ SlotIndex, MachineBasicBlock::iterator MI);
+
+ void spillAroundUses(unsigned Reg);
+ void spillAll();
+};
+}
+
+namespace llvm {
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ return new InlineSpiller(pass, mf, vrm);
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Snippets
+//===----------------------------------------------------------------------===//
+
+// When spilling a virtual register, we also spill any snippets it is connected
+// to. The snippets are small live ranges that only have a single real use,
+// leftovers from live range splitting. Spilling them enables memory operand
+// folding or tightens the live range around the single use.
+//
+// This minimizes register pressure and maximizes the store-to-load distance for
+// spill slots which can be important in tight loops.
+
+/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
+/// otherwise return 0.
+static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) {
+ if (!MI->isFullCopy())
+ return 0;
+ if (MI->getOperand(0).getReg() == Reg)
+ return MI->getOperand(1).getReg();
+ if (MI->getOperand(1).getReg() == Reg)
+ return MI->getOperand(0).getReg();
+ return 0;
+}
+
+/// isSnippet - Identify if a live interval is a snippet that should be spilled.
+/// It is assumed that SnipLI is a virtual register with the same original as
+/// Edit->getReg().
+bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
+ unsigned Reg = Edit->getReg();
+
+ // A snippet is a tiny live range with only a single instruction using it
+ // besides copies to/from Reg or spills/fills. We accept:
+ //
+ // %snip = COPY %Reg / FILL fi#
+ // %snip = USE %snip
+ // %Reg = COPY %snip / SPILL %snip, fi#
+ //
+ if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI))
+ return false;
+
+ MachineInstr *UseMI = 0;
+
+ // Check that all uses satisfy our criteria.
+ for (MachineRegisterInfo::reg_nodbg_iterator
+ RI = MRI.reg_nodbg_begin(SnipLI.reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+
+ // Allow copies to/from Reg.
+ if (isFullCopyOf(MI, Reg))
+ continue;
+
+ // Allow stack slot loads.
+ int FI;
+ if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow stack slot stores.
+ if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow a single additional instruction.
+ if (UseMI && MI != UseMI)
+ return false;
+ UseMI = MI;
+ }
+ return true;
+}
+
+/// collectRegsToSpill - Collect live range snippets that only have a single
+/// real use.
+void InlineSpiller::collectRegsToSpill() {
+ unsigned Reg = Edit->getReg();
+
+ // Main register always spills.
+ RegsToSpill.assign(1, Reg);
+ SnippetCopies.clear();
+
+ // Snippets all have the same original, so there can't be any for an original
+ // register.
+ if (Original == Reg)
+ return;
+
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+ unsigned SnipReg = isFullCopyOf(MI, Reg);
+ if (!isSibling(SnipReg))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(SnipReg);
+ if (!isSnippet(SnipLI))
+ continue;
+ SnippetCopies.insert(MI);
+ if (isRegToSpill(SnipReg))
+ continue;
+ RegsToSpill.push_back(SnipReg);
+ DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+ ++NumSnippets;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Sibling Values
+//===----------------------------------------------------------------------===//
+
+// After live range splitting, some values to be spilled may be defined by
+// copies from sibling registers. We trace the sibling copies back to the
+// original value if it still exists. We need it for rematerialization.
+//
+// Even when the value can't be rematerialized, we still want to determine if
+// the value has already been spilled, or we may want to hoist the spill from a
+// loop.
+
+bool InlineSpiller::isSibling(unsigned Reg) {
+ return TargetRegisterInfo::isVirtualRegister(Reg) &&
+ VRM.getOriginal(Reg) == Original;
+}
+
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+ const InlineSpiller::SibValueInfo &SVI) {
+ OS << "spill " << PrintReg(SVI.SpillReg) << ':'
+ << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def;
+ if (SVI.SpillMBB)
+ OS << " in BB#" << SVI.SpillMBB->getNumber();
+ if (SVI.AllDefsAreReloads)
+ OS << " all-reloads";
+ if (SVI.DefByOrigPHI)
+ OS << " orig-phi";
+ if (SVI.KillsSource)
+ OS << " kill";
+ OS << " deps[";
+ for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i)
+ OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def;
+ OS << " ]";
+ if (SVI.DefMI)
+ OS << " def: " << *SVI.DefMI;
+ else
+ OS << '\n';
+ return OS;
+}
+#endif
+
+/// propagateSiblingValue - Propagate the value in SVI to dependents if it is
+/// known. Otherwise remember the dependency for later.
+///
+/// @param SVI SibValues entry to propagate.
+/// @param VNI Dependent value, or NULL to propagate to all saved dependents.
+void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI,
+ VNInfo *VNI) {
+ // When VNI is non-NULL, add it to SVI's deps, and only propagate to that.
+ TinyPtrVector<VNInfo*> FirstDeps;
+ if (VNI) {
+ FirstDeps.push_back(VNI);
+ SVI->second.Deps.push_back(VNI);
+ }
+
+ // Has the value been completely determined yet? If not, defer propagation.
+ if (!SVI->second.hasDef())
+ return;
+
+ // Work list of values to propagate. It would be nice to use a SetVector
+ // here, but then we would be forced to use a SmallSet.
+ SmallVector<SibValueMap::iterator, 8> WorkList(1, SVI);
+ SmallPtrSet<VNInfo*, 8> WorkSet;
+
+ do {
+ SVI = WorkList.pop_back_val();
+ WorkSet.erase(SVI->first);
+ TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
+ VNI = 0;
+
+ SibValueInfo &SV = SVI->second;
+ if (!SV.SpillMBB)
+ SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);
+
+ DEBUG(dbgs() << " prop to " << Deps->size() << ": "
+ << SVI->first->id << '@' << SVI->first->def << ":\t" << SV);
+
+ assert(SV.hasDef() && "Propagating undefined value");
+
+ // Should this value be propagated as a preferred spill candidate? We don't
+ // propagate values of registers that are about to spill.
+ bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
+ unsigned SpillDepth = ~0u;
+
+ for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(),
+ DepE = Deps->end(); DepI != DepE; ++DepI) {
+ SibValueMap::iterator DepSVI = SibValues.find(*DepI);
+ assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
+ SibValueInfo &DepSV = DepSVI->second;
+ if (!DepSV.SpillMBB)
+ DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);
+
+ bool Changed = false;
+
+ // Propagate defining instruction.
+ if (!DepSV.hasDef()) {
+ Changed = true;
+ DepSV.DefMI = SV.DefMI;
+ DepSV.DefByOrigPHI = SV.DefByOrigPHI;
+ }
+
+ // Propagate AllDefsAreReloads. For PHI values, this computes an AND of
+ // all predecessors.
+ if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
+ Changed = true;
+ DepSV.AllDefsAreReloads = false;
+ }
+
+ // Propagate best spill value.
+ if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
+ if (SV.SpillMBB == DepSV.SpillMBB) {
+ // DepSV is in the same block. Hoist when dominated.
+ if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) {
+ // This is an alternative def earlier in the same MBB.
+ // Hoist the spill as far as possible in SpillMBB. This can ease
+ // register pressure:
+ //
+ // x = def
+ // y = use x
+ // s = copy x
+ //
+ // Hoisting the spill of s to immediately after the def removes the
+ // interference between x and y:
+ //
+ // x = def
+ // spill x
+ // y = use x<kill>
+ //
+ // This hoist only helps when the DepSV copy kills its source.
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ } else {
+ // DepSV is in a different block.
+ if (SpillDepth == ~0u)
+ SpillDepth = Loops.getLoopDepth(SV.SpillMBB);
+
+ // Also hoist spills to blocks with smaller loop depth, but make sure
+ // that the new value dominates. Non-phi dependents are always
+ // dominated, phis need checking.
+ if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
+ (!DepSVI->first->isPHIDef() ||
+ MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ }
+ }
+
+ if (!Changed)
+ continue;
+
+ // Something changed in DepSVI. Propagate to dependents.
+ if (WorkSet.insert(DepSVI->first))
+ WorkList.push_back(DepSVI);
+
+ DEBUG(dbgs() << " update " << DepSVI->first->id << '@'
+ << DepSVI->first->def << " to:\t" << DepSV);
+ }
+ } while (!WorkList.empty());
+}
+
+/// traceSiblingValue - Trace a value that is about to be spilled back to the
+/// real defining instructions by looking through sibling copies. Always stay
+/// within the range of OrigVNI so the registers are known to carry the same
+/// value.
+///
+/// Determine if the value is defined by all reloads, so spilling isn't
+/// necessary - the value is already in the stack slot.
+///
+/// Return a defining instruction that may be a candidate for rematerialization.
+///
+MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
+ VNInfo *OrigVNI) {
+ // Check if a cached value already exists.
+ SibValueMap::iterator SVI;
+ bool Inserted;
+ tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI)));
+ if (!Inserted) {
+ DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second);
+ return SVI->second.DefMI;
+ }
+
+ DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << '\n');
+
+ // List of (Reg, VNI) that have been inserted into SibValues, but need to be
+ // processed.
+ SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(UseReg, UseVNI));
+
+ do {
+ unsigned Reg;
+ VNInfo *VNI;
+ tie(Reg, VNI) = WorkList.pop_back_val();
+ DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+ << ":\t");
+
+ // First check if this value has already been computed.
+ SVI = SibValues.find(VNI);
+ assert(SVI != SibValues.end() && "Missing SibValues entry");
+
+ // Trace through PHI-defs created by live range splitting.
+ if (VNI->isPHIDef()) {
+ // Stop at original PHIs. We don't know the value at the predecessors.
+ if (VNI->def == OrigVNI->def) {
+ DEBUG(dbgs() << "orig phi value\n");
+ SVI->second.DefByOrigPHI = true;
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
+ continue;
+ }
+
+ // This is a PHI inserted by live range splitting. We could trace the
+ // live-out value from predecessor blocks, but that search can be very
+ // expensive if there are many predecessors and many more PHIs as
+ // generated by tail-dup when it sees an indirectbr. Instead, look at
+ // all the non-PHI defs that have the same value as OrigVNI. They must
+ // jointly dominate VNI->def. This is not optimal since VNI may actually
+ // be jointly dominated by a smaller subset of defs, so there is a change
+ // we will miss a AllDefsAreReloads optimization.
+
+ // Separate all values dominated by OrigVNI into PHIs and non-PHIs.
+ SmallVector<VNInfo*, 8> PHIs, NonPHIs;
+ LiveInterval &LI = LIS.getInterval(Reg);
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+
+ for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI2 = *VI;
+ if (VNI2->isUnused())
+ continue;
+ if (!OrigLI.containsOneValue() &&
+ OrigLI.getVNInfoAt(VNI2->def) != OrigVNI)
+ continue;
+ if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def)
+ PHIs.push_back(VNI2);
+ else
+ NonPHIs.push_back(VNI2);
+ }
+ DEBUG(dbgs() << "split phi value, checking " << PHIs.size()
+ << " phi-defs, and " << NonPHIs.size()
+ << " non-phi/orig defs\n");
+
+ // Create entries for all the PHIs. Don't add them to the worklist, we
+ // are processing all of them in one go here.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i])));
+
+ // Add every PHI as a dependent of all the non-PHIs.
+ for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) {
+ VNInfo *NonPHI = NonPHIs[i];
+ // Known value? Try an insertion.
+ tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
+ // Add all the PHIs as dependents of NonPHI.
+ for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi)
+ SVI->second.Deps.push_back(PHIs[pi]);
+ // This is the first time we see NonPHI, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(Reg, NonPHI));
+ else
+ // Propagate to all inserted PHIs, not just VNI.
+ propagateSiblingValue(SVI);
+ }
+
+ // Next work list item.
+ continue;
+ }
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Missing def");
+
+ // Trace through sibling copies.
+ if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(SrcReg)) {
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ LiveRangeQuery SrcQ(SrcLI, VNI->def);
+ assert(SrcQ.valueIn() && "Copy from non-existing value");
+ // Check if this COPY kills its source.
+ SVI->second.KillsSource = SrcQ.isKill();
+ VNInfo *SrcVNI = SrcQ.valueIn();
+ DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
+ << SrcVNI->id << '@' << SrcVNI->def
+ << " kill=" << unsigned(SVI->second.KillsSource) << '\n');
+ // Known sibling source value? Try an insertion.
+ tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI,
+ SibValueInfo(SrcReg, SrcVNI)));
+ // This is the first time we see Src, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+ propagateSiblingValue(SVI, VNI);
+ // Next work list item.
+ continue;
+ }
+ }
+
+ // Track reachable reloads.
+ SVI->second.DefMI = MI;
+ SVI->second.SpillMBB = MI->getParent();
+ int FI;
+ if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << "reload\n");
+ propagateSiblingValue(SVI);
+ // Next work list item.
+ continue;
+ }
+
+ // Potential remat candidate.
+ DEBUG(dbgs() << "def " << *MI);
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
+ } while (!WorkList.empty());
+
+ // Look up the value we were looking for. We already did this lookup at the
+ // top of the function, but SibValues may have been invalidated.
+ SVI = SibValues.find(UseVNI);
+ assert(SVI != SibValues.end() && "Didn't compute requested info");
+ DEBUG(dbgs() << " traced to:\t" << SVI->second);
+ return SVI->second.DefMI;
+}
+
+/// analyzeSiblingValues - Trace values defined by sibling copies back to
+/// something that isn't a sibling copy.
+///
+/// Keep track of values that may be rematerializable.
+void InlineSpiller::analyzeSiblingValues() {
+ SibValues.clear();
+
+ // No siblings at all?
+ if (Edit->getReg() == Original)
+ return;
+
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
+ VE = LI.vni_end(); VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = 0;
+ if (!VNI->isPHIDef()) {
+ DefMI = LIS.getInstructionFromIndex(VNI->def);
+ assert(DefMI && "No defining instruction");
+ }
+ // Check possible sibling copies.
+ if (VNI->isPHIDef() || DefMI->isCopy()) {
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ assert(OrigVNI && "Def outside original live range");
+ if (OrigVNI->def != VNI->def)
+ DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
+ }
+ if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
+ DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
+ << VNI->def << " may remat from " << *DefMI);
+ }
+ }
+ }
+}
+
+/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
+/// a spill at a better location.
+bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
+ SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+ VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
+ assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
+ SibValueMap::iterator I = SibValues.find(VNI);
+ if (I == SibValues.end())
+ return false;
+
+ const SibValueInfo &SVI = I->second;
+
+ // Let the normal folding code deal with the boring case.
+ if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
+ return false;
+
+ // SpillReg may have been deleted by remat and DCE.
+ if (!LIS.hasInterval(SVI.SpillReg)) {
+ DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
+ return false;
+ }
+
+ LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
+ if (!SibLI.containsValue(SVI.SpillVNI)) {
+ DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
+ return false;
+ }
+
+ // Conservatively extend the stack slot range to the range of the original
+ // value. We may be able to do better with stack slot coloring by being more
+ // careful here.
+ assert(StackInt && "No stack slot assigned yet.");
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
+ << *StackInt << '\n');
+
+ // Already spilled everywhere.
+ if (SVI.AllDefsAreReloads) {
+ DEBUG(dbgs() << "\tno spill needed: " << SVI);
+ ++NumOmitReloadSpill;
+ return true;
+ }
+ // We are going to spill SVI.SpillVNI immediately after its def, so clear out
+ // any later spills of the same value.
+ eliminateRedundantSpills(SibLI, SVI.SpillVNI);
+
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
+ MachineBasicBlock::iterator MII;
+ if (SVI.SpillVNI->isPHIDef())
+ MII = MBB->SkipPHIsAndLabels(MBB->begin());
+ else {
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+ assert(DefMI && "Defining instruction disappeared");
+ MII = DefMI;
+ ++MII;
+ }
+ // Insert spill without kill flag immediately after def.
+ TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
+ MRI.getRegClass(SVI.SpillReg), &TRI);
+ --MII; // Point to store instruction.
+ LIS.InsertMachineInstrInMaps(MII);
+ DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
+
+ ++NumSpills;
+ ++NumHoists;
+ return true;
+}
+
+/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any
+/// redundant spills of this value in SLI.reg and sibling copies.
+void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
+ assert(VNI && "Missing value");
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(&SLI, VNI));
+ assert(StackInt && "No stack slot assigned yet.");
+
+ do {
+ LiveInterval *LI;
+ tie(LI, VNI) = WorkList.pop_back_val();
+ unsigned Reg = LI->reg;
+ DEBUG(dbgs() << "Checking redundant spills for "
+ << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
+
+ // Regs to spill are taken care of.
+ if (isRegToSpill(Reg))
+ continue;
+
+ // Add all of VNI's live range to StackInt.
+ StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
+
+ // Find all spills and copies of VNI.
+ for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
+ MachineInstr *MI = UI.skipInstruction();) {
+ if (!MI->isCopy() && !MI->mayStore())
+ continue;
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (LI->getVNInfoAt(Idx) != VNI)
+ continue;
+
+ // Follow sibling copies down the dominator tree.
+ if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(DstReg)) {
+ LiveInterval &DstLI = LIS.getInterval(DstReg);
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && "Missing defined value");
+ assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
+ WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+ }
+ continue;
+ }
+
+ // Erase spills.
+ int FI;
+ if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI);
+ // eliminateDeadDefs won't normally remove stores, so switch opcode.
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ DeadDefs.push_back(MI);
+ ++NumSpillsRemoved;
+ --NumSpills;
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Rematerialization
+//===----------------------------------------------------------------------===//
+
+/// markValueUsed - Remember that VNI failed to rematerialize, so its defining
+/// instruction cannot be eliminated. See through snippet copies
+void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(LI, VNI));
+ do {
+ tie(LI, VNI) = WorkList.pop_back_val();
+ if (!UsedValues.insert(VNI))
+ continue;
+
+ if (VNI->isPHIDef()) {
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI));
+ if (PVNI)
+ WorkList.push_back(std::make_pair(LI, PVNI));
+ }
+ continue;
+ }
+
+ // Follow snippet copies.
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ if (!SnippetCopies.count(MI))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
+ assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
+ VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
+ assert(SnipVNI && "Snippet undefined before copy");
+ WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
+ } while (!WorkList.empty());
+}
+
+/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
+bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
+ MachineBasicBlock::iterator MI) {
+ SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
+ VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
+
+ if (!ParentVNI) {
+ DEBUG(dbgs() << "\tadding <undef> flags: ");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
+ MO.setIsUndef();
+ }
+ DEBUG(dbgs() << UseIdx << '\t' << *MI);
+ return true;
+ }
+
+ if (SnippetCopies.count(MI))
+ return false;
+
+ // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
+ if (SibI != SibValues.end())
+ RM.OrigMI = SibI->second.DefMI;
+ if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
+ return false;
+ }
+
+ // If the instruction also writes VirtReg.reg, it had better not require the
+ // same register for uses and defs.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ MIBundleOperands::VirtRegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+ if (RI.Tied) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+ return false;
+ }
+
+ // Before rematerializing into a register for a single instruction, try to
+ // fold a load into the instruction. That avoids allocating a new register.
+ if (RM.OrigMI->canFoldAsLoad() &&
+ foldMemoryOperand(Ops, RM.OrigMI)) {
+ Edit->markRematerialized(RM.ParentVNI);
+ ++NumFoldedLoads;
+ return true;
+ }
+
+ // Alocate a new register for the remat.
+ LiveInterval &NewLI = Edit->createFrom(Original);
+ NewLI.markNotSpillable();
+
+ // Finally we can rematerialize OrigMI before MI.
+ SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+ TRI);
+ DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
+ << *LIS.getInstructionFromIndex(DefIdx));
+
+ // Replace operands
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i].second);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
+ MO.setReg(NewLI.reg);
+ MO.setIsKill();
+ }
+ }
+ DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
+
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI));
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ ++NumRemats;
+ return true;
+}
+
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
+/// and trim the live ranges after.
+void InlineSpiller::reMaterializeAll() {
+ // analyzeSiblingValues has already tested all relevant defining instructions.
+ if (!Edit->anyRematerializable(AA))
+ return;
+
+ UsedValues.clear();
+
+ // Try to remat before all uses of snippets.
+ bool anyRemat = false;
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (MachineRegisterInfo::use_nodbg_iterator
+ RI = MRI.use_nodbg_begin(Reg);
+ MachineInstr *MI = RI.skipBundle();)
+ anyRemat |= reMaterializeFor(LI, MI);
+ }
+ if (!anyRemat)
+ return;
+
+ // Remove any values that were completely rematted.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ MI->addRegisterDead(Reg, &TRI);
+ if (!MI->allDefsAreDead())
+ continue;
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ DeadDefs.push_back(MI);
+ }
+ }
+
+ // Eliminate dead code after remat. Note that some snippet copies may be
+ // deleted here.
+ if (DeadDefs.empty())
+ return;
+ DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
+
+ // Get rid of deleted and empty intervals.
+ for (unsigned i = RegsToSpill.size(); i != 0; --i) {
+ unsigned Reg = RegsToSpill[i-1];
+ if (!LIS.hasInterval(Reg)) {
+ RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ continue;
+ }
+ LiveInterval &LI = LIS.getInterval(Reg);
+ if (!LI.empty())
+ continue;
+ Edit->eraseVirtReg(Reg);
+ RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ }
+ DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
+}
+
+
+//===----------------------------------------------------------------------===//
+// Spilling
+//===----------------------------------------------------------------------===//
+
+/// If MI is a load or store of StackSlot, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
+ int FI = 0;
+ unsigned InstrReg = TII.isLoadFromStackSlot(MI, FI);
+ bool IsLoad = InstrReg;
+ if (!IsLoad)
+ InstrReg = TII.isStoreToStackSlot(MI, FI);
+
+ // We have a stack access. Is it the right register and slot?
+ if (InstrReg != Reg || FI != StackSlot)
+ return false;
+
+ DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+
+ if (IsLoad) {
+ ++NumReloadsRemoved;
+ --NumReloads;
+ } else {
+ ++NumSpillsRemoved;
+ --NumSpills;
+ }
+
+ return true;
+}
+
+/// foldMemoryOperand - Try folding stack slot references in Ops into their
+/// instructions.
+///
+/// @param Ops Operand indices from analyzeVirtReg().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return True on success.
+bool InlineSpiller::
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
+ MachineInstr *LoadMI) {
+ if (Ops.empty())
+ return false;
+ // Don't attempt folding in bundles.
+ MachineInstr *MI = Ops.front().first;
+ if (Ops.back().first != MI || MI->isBundled())
+ return false;
+
+ bool WasCopy = MI->isCopy();
+ unsigned ImpReg = 0;
+
+ // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
+ // operands.
+ SmallVector<unsigned, 8> FoldOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ unsigned Idx = Ops[i].second;
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (MO.isImplicit()) {
+ ImpReg = MO.getReg();
+ continue;
+ }
+ // FIXME: Teach targets to deal with subregs.
+ if (MO.getSubReg())
+ return false;
+ // We cannot fold a load instruction into a def.
+ if (LoadMI && MO.isDef())
+ return false;
+ // Tied use operands should not be passed to foldMemoryOperand.
+ if (!MI->isRegTiedToDefOperand(Idx))
+ FoldOps.push_back(Idx);
+ }
+
+ MachineInstr *FoldMI =
+ LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
+ : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
+ if (!FoldMI)
+ return false;
+ LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
+ MI->eraseFromParent();
+
+ // TII.foldMemoryOperand may have left some implicit operands on the
+ // instruction. Strip them.
+ if (ImpReg)
+ for (unsigned i = FoldMI->getNumOperands(); i; --i) {
+ MachineOperand &MO = FoldMI->getOperand(i - 1);
+ if (!MO.isReg() || !MO.isImplicit())
+ break;
+ if (MO.getReg() == ImpReg)
+ FoldMI->RemoveOperand(i - 1);
+ }
+
+ DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t'
+ << *FoldMI);
+ if (!WasCopy)
+ ++NumFolded;
+ else if (Ops.front().second == 0)
+ ++NumSpills;
+ else
+ ++NumReloads;
+ return true;
+}
+
+/// insertReload - Insert a reload of NewLI.reg before MI.
+void InlineSpiller::insertReload(LiveInterval &NewLI,
+ SlotIndex Idx,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
+ MRI.getRegClass(NewLI.reg), &TRI);
+ --MI; // Point to load instruction.
+ SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
+ // Some (out-of-tree) targets have EC reload instructions.
+ if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg))
+ if (MO->isEarlyClobber())
+ LoadIdx = LoadIdx.getRegSlot(true);
+ DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+ ++NumReloads;
+}
+
+/// insertSpill - Insert a spill of NewLI.reg after MI.
+void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+ SlotIndex Idx, MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
+ MRI.getRegClass(NewLI.reg), &TRI);
+ --MI; // Point to store instruction.
+ SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
+ DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+ ++NumSpills;
+}
+
+/// spillAroundUses - insert spill code around each use of Reg.
+void InlineSpiller::spillAroundUses(unsigned Reg) {
+ DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
+ LiveInterval &OldLI = LIS.getInterval(Reg);
+
+ // Iterate over instructions using Reg.
+ for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg);
+ MachineInstr *MI = RegI.skipBundle();) {
+
+ // Debug values are not allowed to affect codegen.
+ if (MI->isDebugValue()) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ uint64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot,
+ Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ } else {
+ DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+ MI->eraseFromParent();
+ }
+ continue;
+ }
+
+ // Ignore copies to/from snippets. We'll delete them.
+ if (SnippetCopies.count(MI))
+ continue;
+
+ // Stack slot accesses may coalesce away.
+ if (coalesceStackAccess(MI, Reg))
+ continue;
+
+ // Analyze instruction.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ MIBundleOperands::VirtRegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
+
+ // Find the slot index where this instruction reads and writes OldLI.
+ // This is usually the def slot, except for tied early clobbers.
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
+ if (SlotIndex::isSameInstr(Idx, VNI->def))
+ Idx = VNI->def;
+
+ // Check for a sibling copy.
+ unsigned SibReg = isFullCopyOf(MI, Reg);
+ if (SibReg && isSibling(SibReg)) {
+ // This may actually be a copy between snippets.
+ if (isRegToSpill(SibReg)) {
+ DEBUG(dbgs() << "Found new snippet copy: " << *MI);
+ SnippetCopies.insert(MI);
+ continue;
+ }
+ if (RI.Writes) {
+ // Hoist the spill of a sib-reg copy.
+ if (hoistSpill(OldLI, MI)) {
+ // This COPY is now dead, the value is already in the stack slot.
+ MI->getOperand(0).setIsDead();
+ DeadDefs.push_back(MI);
+ continue;
+ }
+ } else {
+ // This is a reload for a sib-reg copy. Drop spills downstream.
+ LiveInterval &SibLI = LIS.getInterval(SibReg);
+ eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
+ // The COPY will fold to a reload below.
+ }
+ }
+
+ // Attempt to fold memory ops.
+ if (foldMemoryOperand(Ops))
+ continue;
+
+ // Allocate interval around instruction.
+ // FIXME: Infer regclass from instruction alone.
+ LiveInterval &NewLI = Edit->createFrom(Reg);
+ NewLI.markNotSpillable();
+
+ if (RI.Reads)
+ insertReload(NewLI, Idx, MI);
+
+ // Rewrite instruction operands.
+ bool hasLiveDef = false;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
+ MO.setReg(NewLI.reg);
+ if (MO.isUse()) {
+ if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
+ MO.setIsKill();
+ } else {
+ if (!MO.isDead())
+ hasLiveDef = true;
+ }
+ }
+ DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
+
+ // FIXME: Use a second vreg if instruction has no tied ops.
+ if (RI.Writes) {
+ if (hasLiveDef)
+ insertSpill(NewLI, OldLI, Idx, MI);
+ else {
+ // This instruction defines a dead value. We don't need to spill it,
+ // but do create a live range for the dead value.
+ VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI));
+ }
+ }
+
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ }
+}
+
+/// spillAll - Spill all registers remaining after rematerialization.
+void InlineSpiller::spillAll() {
+ // Update LiveStacks now that we are committed to spilling.
+ if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
+ StackSlot = VRM.assignVirt2StackSlot(Original);
+ StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
+ StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator());
+ } else
+ StackInt = &LSS.getInterval(StackSlot);
+
+ if (Original != Edit->getReg())
+ VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
+
+ assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]),
+ StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
+
+ // Spill around uses of all RegsToSpill.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ spillAroundUses(RegsToSpill[i]);
+
+ // Hoisted spills may cause dead code.
+ if (!DeadDefs.empty()) {
+ DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
+ }
+
+ // Finally delete the SnippetCopies.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]);
+ MachineInstr *MI = RI.skipInstruction();) {
+ assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
+ // FIXME: Do this with a LiveRangeEdit callback.
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
+ }
+
+ // Delete all spilled registers.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ Edit->eraseVirtReg(RegsToSpill[i]);
+}
+
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+ ++NumSpilledRanges;
+ Edit = &edit;
+ assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+ && "Trying to spill a stack slot.");
+ // Share a stack slot among all descendants of Original.
+ Original = VRM.getOriginal(edit.getReg());
+ StackSlot = VRM.getStackSlot(Original);
+ StackInt = 0;
+
+ DEBUG(dbgs() << "Inline spilling "
+ << MRI.getRegClass(edit.getReg())->getName()
+ << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent()
+ << "\nFrom original " << LIS.getInterval(Original) << '\n');
+ assert(edit.getParent().isSpillable() &&
+ "Attempting to spill already spilled value.");
+ assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
+
+ collectRegsToSpill();
+ analyzeSiblingValues();
+ reMaterializeAll();
+
+ // Remat may handle everything.
+ if (!RegsToSpill.empty())
+ spillAll();
+
+ Edit->calculateRegClassAndHint(MF, Loops);
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
new file mode 100644
index 0000000..a8e711e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -0,0 +1,231 @@
+//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "InterferenceCache.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+// Static member used for null interference cursors.
+InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
+
+void InterferenceCache::init(MachineFunction *mf,
+ LiveIntervalUnion *liuarray,
+ SlotIndexes *indexes,
+ LiveIntervals *lis,
+ const TargetRegisterInfo *tri) {
+ MF = mf;
+ LIUArray = liuarray;
+ TRI = tri;
+ PhysRegEntries.assign(TRI->getNumRegs(), 0);
+ for (unsigned i = 0; i != CacheEntries; ++i)
+ Entries[i].clear(mf, indexes, lis);
+}
+
+InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
+ unsigned E = PhysRegEntries[PhysReg];
+ if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
+ if (!Entries[E].valid(LIUArray, TRI))
+ Entries[E].revalidate(LIUArray, TRI);
+ return &Entries[E];
+ }
+ // No valid entry exists, pick the next round-robin entry.
+ E = RoundRobin;
+ if (++RoundRobin == CacheEntries)
+ RoundRobin = 0;
+ for (unsigned i = 0; i != CacheEntries; ++i) {
+ // Skip entries that are in use.
+ if (Entries[E].hasRefs()) {
+ if (++E == CacheEntries)
+ E = 0;
+ continue;
+ }
+ Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+ PhysRegEntries[PhysReg] = E;
+ return &Entries[E];
+ }
+ llvm_unreachable("Ran out of interference cache entries.");
+}
+
+/// revalidate - LIU contents have changed, update tags.
+void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ // Invalidate all block entries.
+ ++Tag;
+ // Invalidate all iterators.
+ PrevPos = SlotIndex();
+ unsigned i = 0;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i)
+ RegUnits[i].VirtTag = LIUArray[*Units].getTag();
+}
+
+void InterferenceCache::Entry::reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF) {
+ assert(!hasRefs() && "Cannot reset cache entry with references");
+ // LIU's changed, invalidate cache.
+ ++Tag;
+ PhysReg = physReg;
+ Blocks.resize(MF->getNumBlockIDs());
+
+ // Reset iterators.
+ PrevPos = SlotIndex();
+ RegUnits.clear();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ RegUnits.push_back(LIUArray[*Units]);
+ RegUnits.back().Fixed = &LIS->getRegUnit(*Units);
+ }
+}
+
+bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ unsigned i = 0, e = RegUnits.size();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) {
+ if (i == e)
+ return false;
+ if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag))
+ return false;
+ }
+ return i == e;
+}
+
+void InterferenceCache::Entry::update(unsigned MBBNum) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+
+ // Use advanceTo only when possible.
+ if (PrevPos != Start) {
+ if (!PrevPos.isValid() || Start < PrevPos) {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.find(Start);
+ RUI.FixedI = RUI.Fixed->find(Start);
+ }
+ } else {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.advanceTo(Start);
+ if (RUI.FixedI != RUI.Fixed->end())
+ RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start);
+ }
+ }
+ PrevPos = Start;
+ }
+
+ MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+ BlockInterference *BI = &Blocks[MBBNum];
+ ArrayRef<SlotIndex> RegMaskSlots;
+ ArrayRef<const uint32_t*> RegMaskBits;
+ for (;;) {
+ BI->Tag = Tag;
+ BI->First = BI->Last = SlotIndex();
+
+ // Check for first interference from virtregs.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ if (!I.valid())
+ continue;
+ SlotIndex StartI = I.start();
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ // Same thing for fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::const_iterator I = RegUnits[i].FixedI;
+ LiveInterval::const_iterator E = RegUnits[i].Fixed->end();
+ if (I == E)
+ continue;
+ SlotIndex StartI = I->start;
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ // Also check for register mask interference.
+ RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
+ RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
+ SlotIndex Limit = BI->First.isValid() ? BI->First : Stop;
+ for (unsigned i = 0, e = RegMaskSlots.size();
+ i != e && RegMaskSlots[i] < Limit; ++i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) {
+ // Register mask i clobbers PhysReg before the LIU interference.
+ BI->First = RegMaskSlots[i];
+ break;
+ }
+
+ PrevPos = Stop;
+ if (BI->First.isValid())
+ break;
+
+ // No interference in this block? Go ahead and precompute the next block.
+ if (++MFI == MF->end())
+ return;
+ MBBNum = MFI->getNumber();
+ BI = &Blocks[MBBNum];
+ if (BI->Tag == Tag)
+ return;
+ tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+ }
+
+ // Check for last interference in block.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ if (!I.valid() || I.start() >= Stop)
+ continue;
+ I.advanceTo(Stop);
+ bool Backup = !I.valid() || I.start() >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I.stop();
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
+ // Fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::iterator &I = RegUnits[i].FixedI;
+ LiveInterval *LI = RegUnits[i].Fixed;
+ if (I == LI->end() || I->start >= Stop)
+ continue;
+ I = LI->advanceTo(I, Stop);
+ bool Backup = I == LI->end() || I->start >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I->end;
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
+ // Also check for register mask interference.
+ SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
+ for (unsigned i = RegMaskSlots.size();
+ i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) {
+ // Register mask i-1 clobbers PhysReg after the LIU interference.
+ // Model the regmask clobber as a dead def.
+ BI->Last = RegMaskSlots[i-1].getDeadSlot();
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h
new file mode 100644
index 0000000..c02fb9a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h
@@ -0,0 +1,228 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference from LiveIntervalUnions,
+// fixed RegUnit interference, and register masks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INTERFERENCECACHE
+#define LLVM_CODEGEN_INTERFERENCECACHE
+
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+
+namespace llvm {
+
+class LiveIntervals;
+
+class InterferenceCache {
+ const TargetRegisterInfo *TRI;
+ LiveIntervalUnion *LIUArray;
+ MachineFunction *MF;
+
+ /// BlockInterference - information about the interference in a single basic
+ /// block.
+ struct BlockInterference {
+ BlockInterference() : Tag(0) {}
+ unsigned Tag;
+ SlotIndex First;
+ SlotIndex Last;
+ };
+
+ /// Entry - A cache entry containing interference information for all aliases
+ /// of PhysReg in all basic blocks.
+ class Entry {
+ /// PhysReg - The register currently represented.
+ unsigned PhysReg;
+
+ /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
+ /// change.
+ unsigned Tag;
+
+ /// RefCount - The total number of Cursor instances referring to this Entry.
+ unsigned RefCount;
+
+ /// MF - The current function.
+ MachineFunction *MF;
+
+ /// Indexes - Mapping block numbers to SlotIndex ranges.
+ SlotIndexes *Indexes;
+
+ /// LIS - Used for accessing register mask interference maps.
+ LiveIntervals *LIS;
+
+ /// PrevPos - The previous position the iterators were moved to.
+ SlotIndex PrevPos;
+
+ /// RegUnitInfo - Information tracked about each RegUnit in PhysReg.
+ /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos)
+ /// had just been called.
+ struct RegUnitInfo {
+ /// Iterator pointing into the LiveIntervalUnion containing virtual
+ /// register interference.
+ LiveIntervalUnion::SegmentIter VirtI;
+
+ /// Tag of the LIU last time we looked.
+ unsigned VirtTag;
+
+ /// Fixed interference in RegUnit.
+ LiveInterval *Fixed;
+
+ /// Iterator pointing into the fixed RegUnit interference.
+ LiveInterval::iterator FixedI;
+
+ RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) {
+ VirtI.setMap(LIU.getMap());
+ }
+ };
+
+ /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have
+ /// more than 4 RegUnits.
+ SmallVector<RegUnitInfo, 4> RegUnits;
+
+ /// Blocks - Interference for each block in the function.
+ SmallVector<BlockInterference, 8> Blocks;
+
+ /// update - Recompute Blocks[MBBNum]
+ void update(unsigned MBBNum);
+
+ public:
+ Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {}
+
+ void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
+ assert(!hasRefs() && "Cannot clear cache entry with references");
+ PhysReg = 0;
+ MF = mf;
+ Indexes = indexes;
+ LIS = lis;
+ }
+
+ unsigned getPhysReg() const { return PhysReg; }
+
+ void addRef(int Delta) { RefCount += Delta; }
+
+ bool hasRefs() const { return RefCount > 0; }
+
+ void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// valid - Return true if this is a valid entry for physReg.
+ bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// reset - Initialize entry to represent physReg's aliases.
+ void reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF);
+
+ /// get - Return an up to date BlockInterference.
+ BlockInterference *get(unsigned MBBNum) {
+ if (Blocks[MBBNum].Tag != Tag)
+ update(MBBNum);
+ return &Blocks[MBBNum];
+ }
+ };
+
+ // We don't keep a cache entry for every physical register, that would use too
+ // much memory. Instead, a fixed number of cache entries are used in a round-
+ // robin manner.
+ enum { CacheEntries = 32 };
+
+ // Point to an entry for each physreg. The entry pointed to may not be up to
+ // date, and it may have been reused for a different physreg.
+ SmallVector<unsigned char, 2> PhysRegEntries;
+
+ // Next round-robin entry to be picked.
+ unsigned RoundRobin;
+
+ // The actual cache entries.
+ Entry Entries[CacheEntries];
+
+ // get - Get a valid entry for PhysReg.
+ Entry *get(unsigned PhysReg);
+
+public:
+ InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {}
+
+ /// init - Prepare cache for a new function.
+ void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
+ const TargetRegisterInfo *);
+
+ /// getMaxCursors - Return the maximum number of concurrent cursors that can
+ /// be supported.
+ unsigned getMaxCursors() const { return CacheEntries; }
+
+ /// Cursor - The primary query interface for the block interference cache.
+ class Cursor {
+ Entry *CacheEntry;
+ BlockInterference *Current;
+ static BlockInterference NoInterference;
+
+ void setEntry(Entry *E) {
+ Current = 0;
+ // Update reference counts. Nothing happens when RefCount reaches 0, so
+ // we don't have to check for E == CacheEntry etc.
+ if (CacheEntry)
+ CacheEntry->addRef(-1);
+ CacheEntry = E;
+ if (CacheEntry)
+ CacheEntry->addRef(+1);
+ }
+
+ public:
+ /// Cursor - Create a dangling cursor.
+ Cursor() : CacheEntry(0), Current(0) {}
+ ~Cursor() { setEntry(0); }
+
+ Cursor(const Cursor &O) : CacheEntry(0), Current(0) {
+ setEntry(O.CacheEntry);
+ }
+
+ Cursor &operator=(const Cursor &O) {
+ setEntry(O.CacheEntry);
+ return *this;
+ }
+
+ /// setPhysReg - Point this cursor to PhysReg's interference.
+ void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+ // Release reference before getting a new one. That guarantees we can
+ // actually have CacheEntries live cursors.
+ setEntry(0);
+ if (PhysReg)
+ setEntry(Cache.get(PhysReg));
+ }
+
+ /// moveTo - Move cursor to basic block MBBNum.
+ void moveToBlock(unsigned MBBNum) {
+ Current = CacheEntry ? CacheEntry->get(MBBNum) : &NoInterference;
+ }
+
+ /// hasInterference - Return true if the current block has any interference.
+ bool hasInterference() {
+ return Current->First.isValid();
+ }
+
+ /// first - Return the starting index of the first interfering range in the
+ /// current block.
+ SlotIndex first() {
+ return Current->First;
+ }
+
+ /// last - Return the ending index of the last interfering range in the
+ /// current block.
+ SlotIndex last() {
+ return Current->Last;
+ }
+ };
+
+ friend class Cursor;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 0000000..07f0ccf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,565 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ Type *RetTy) {
+ // Insert a correctly-typed definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back(I->getType());
+ M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+ const char *FName,
+ const char *DName, const char *LDName) {
+ // Insert definitions for all the floating point types.
+ switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getFloatTy(M.getContext()));
+ break;
+ case Type::DoubleTyID:
+ EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getDoubleTy(M.getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
+ Fn->arg_begin()->getType());
+ break;
+ }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function. This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ Type *RetTy) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getParent()->getParent()->getParent();
+ // Get or insert the definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ Constant* FCache = M->getOrInsertFunction(NewFn,
+ FunctionType::get(RetTy, ParamTys, false));
+
+ IRBuilder<> Builder(CI->getParent(), CI);
+ SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args);
+ NewCI->setName(CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+ LLVMContext &Context = M.getContext();
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration() && !I->use_empty())
+ switch (I->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::setjmp:
+ EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ Type::getInt32Ty(M.getContext()));
+ break;
+ case Intrinsic::longjmp:
+ EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::siglongjmp:
+ EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::memcpy:
+ M.getOrInsertFunction("memcpy",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memmove:
+ M.getOrInsertFunction("memmove",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memset:
+ M.getOrInsertFunction("memset",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt32Ty(M.getContext()),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::sqrt:
+ EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+ break;
+ case Intrinsic::sin:
+ EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+ break;
+ case Intrinsic::cos:
+ EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+ break;
+ case Intrinsic::pow:
+ EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+ break;
+ case Intrinsic::log:
+ EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+ break;
+ case Intrinsic::log2:
+ EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+ break;
+ case Intrinsic::log10:
+ EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+ break;
+ case Intrinsic::exp:
+ EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+ break;
+ case Intrinsic::exp2:
+ EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+ break;
+ }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ switch(BitSize) {
+ default: llvm_unreachable("Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.1");
+ V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.4");
+ Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.3");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
+ "bswap.1");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+ "bswap.and2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+ V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+ "bswap.8");
+ Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+ "bswap.7");
+ Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.6");
+ Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.5");
+ Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.4");
+ Value* Tmp3 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 24),
+ "bswap.3");
+ Value* Tmp2 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 40),
+ "bswap.2");
+ Value* Tmp1 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 56),
+ "bswap.1");
+ Tmp7 = Builder.CreateAnd(Tmp7,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000000000ULL),
+ "bswap.and7");
+ Tmp6 = Builder.CreateAnd(Tmp6,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000000000ULL),
+ "bswap.and6");
+ Tmp5 = Builder.CreateAnd(Tmp5,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00000000ULL),
+ "bswap.and5");
+ Tmp4 = Builder.CreateAnd(Tmp4,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000ULL),
+ "bswap.and4");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000ULL),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00ULL),
+ "bswap.and2");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+ Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+ V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+ break;
+ }
+ }
+ return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+ Value *VShift = Builder.CreateLShr(PartValue,
+ ConstantInt::get(V->getType(), i),
+ "ctpop.sh");
+ Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+ PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+ }
+ Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+ if (BitSize > 64) {
+ V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh");
+ BitSize -= 64;
+ }
+ }
+
+ return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+ V = Builder.CreateOr(V, ShVal, "ctlz.step");
+ }
+
+ V = Builder.CreateNot(V);
+ return LowerCTPOP(Context, V, IP);
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+ const char *Dname,
+ const char *LDname) {
+ CallSite CS(CI);
+ switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid type in intrinsic");
+ case Type::FloatTyID:
+ ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getFloatTy(CI->getContext()));
+ break;
+ case Type::DoubleTyID:
+ ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getDoubleTy(CI->getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
+ CI->getArgOperand(0)->getType());
+ break;
+ }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+ LLVMContext &Context = CI->getContext();
+
+ const Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ CallSite CS(CI);
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
+ Callee->getName() + "'!");
+ default:
+ report_fatal_error("Code generator does not support intrinsic function '"+
+ Callee->getName()+"'!");
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ Value *V = CI->getArgOperand(0);
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+
+ // The setjmp/longjmp intrinsics should only exist in the code if it was
+ // never optimized (ie, right out of the CFE), or if it has been hacked on
+ // by the lowerinvoke pass. In both cases, the right thing to do is to
+ // convert the call to an explicit setjmp or longjmp call.
+ case Intrinsic::setjmp: {
+ Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getInt32Ty(Context));
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+ case Intrinsic::sigsetjmp:
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::longjmp: {
+ ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+
+ case Intrinsic::siglongjmp: {
+ // Insert the call to abort
+ ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getArgOperand(0);
+ Value *NotSrc = Builder.CreateNot(Src);
+ NotSrc->setName(Src->getName() + ".not");
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = Builder.CreateSub(Src, SrcM1);
+ Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ if (!Warned)
+ errs() << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ errs() << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(ConstantPointerNull::get(
+ cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+ break;
+ }
+
+ case Intrinsic::dbg_declare:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_typeid_for:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::var_annotation:
+ break; // Strip out annotate intrinsic
+
+ case Intrinsic::memcpy: {
+ Type *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memmove: {
+ Type *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memset: {
+ Type *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ // Extend the amount to i32.
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt32Ty(Context),
+ /* isSigned */ false);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::sqrt: {
+ ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
+ break;
+ }
+ case Intrinsic::log: {
+ ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
+ break;
+ }
+ case Intrinsic::log2: {
+ ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
+ break;
+ }
+ case Intrinsic::log10: {
+ ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
+ break;
+ }
+ case Intrinsic::exp: {
+ ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
+ break;
+ }
+ case Intrinsic::exp2: {
+ ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
+ break;
+ }
+ case Intrinsic::pow: {
+ ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
+ break;
+ }
+ case Intrinsic::flt_rounds:
+ // Lower to "round to the nearest"
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ break;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ break;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+ // Verify this is a simple bswap.
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
+ !CI->getType()->isIntegerTy())
+ return false;
+
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+
+ Value *Op = CI->getArgOperand(0);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp
new file mode 100644
index 0000000..96a5389
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+using namespace llvm;
+
+void JITCodeEmitter::anchor() { }
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 0000000..1a09837
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,296 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
+
+static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
+ cl::desc("Show encoding in .s output"));
+static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
+ cl::desc("Show instruction structure in .s output"));
+
+static cl::opt<cl::boolOrDefault>
+AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
+ cl::init(cl::BOU_UNSET));
+
+static bool getVerboseAsm() {
+ switch (AsmVerbose) {
+ case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+ llvm_unreachable("Invalid verbose asm state");
+}
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : TargetMachine(T, Triple, CPU, FS, Options) {
+ CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
+ AsmInfo = T.createMCAsmInfo(Triple);
+ // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
+ // and if the old one gets included then MCAsmInfo will be NULL and
+ // we'll crash later.
+ // Provide the user with a useful error message about what's wrong.
+ assert(AsmInfo && "MCAsmInfo not initialized."
+ "Make sure you include the correct TargetSelect.h"
+ "and that InitializeAllTargetMCs() is being invoked!");
+}
+
+void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+}
+
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
+ PassManagerBase &PM,
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
+ // Targets may override createPassConfig to provide a target-specific sublass.
+ TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+ PassConfig->setStartStopPasses(StartAfter, StopAfter);
+
+ // Set PassConfig options provided by TargetMachine.
+ PassConfig->setDisableVerify(DisableVerify);
+
+ PM.add(PassConfig);
+
+ PassConfig->addIRPasses();
+
+ PassConfig->addCodeGenPrepare();
+
+ PassConfig->addPassesToHandleExceptions();
+
+ PassConfig->addISelPrepare();
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ MachineModuleInfo *MMI =
+ new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(),
+ &TM->getTargetLowering()->getObjFileLowering());
+ PM.add(MMI);
+ MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*TM));
+
+ // Enable FastISel with -fast, but allow that to be overridden.
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (TM->getOptLevel() == CodeGenOpt::None &&
+ EnableFastISelOption != cl::BOU_FALSE))
+ TM->setFastISel(true);
+
+ // Ask the target for an isel.
+ if (PassConfig->addInstSelector())
+ return NULL;
+
+ PassConfig->addMachinePasses();
+
+ PassConfig->setInitialized();
+
+ return Context;
+}
+
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
+ // Add common CodeGen passes.
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
+ StartAfter, StopAfter);
+ if (!Context)
+ return true;
+
+ if (StopAfter) {
+ // FIXME: The intent is that this should eventually write out a YAML file,
+ // containing the LLVM IR, the machine-level IR (when stopping after a
+ // machine-level pass), and whatever other information is needed to
+ // deserialize the code and resume compilation. For now, just write the
+ // LLVM IR.
+ PM.add(createPrintModulePass(&Out));
+ return false;
+ }
+
+ if (hasMCSaveTempLabels())
+ Context->setAllowTemporaryLabels(false);
+
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCRegisterInfo &MRI = *getRegisterInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ OwningPtr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter =
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
+ *getInstrInfo(),
+ Context->getRegisterInfo(), STI);
+
+ // Create a code emitter if asked to show the encoding.
+ MCCodeEmitter *MCE = 0;
+ MCAsmBackend *MAB = 0;
+ if (ShowMCEncoding) {
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI,
+ *Context);
+ MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
+ }
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ getVerboseAsm(),
+ hasMCUseLoc(),
+ hasMCUseCFI(),
+ hasMCUseDwarfDirectory(),
+ InstPrinter,
+ MCE, MAB,
+ ShowMCInst);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
+ STI, *Context);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(),
+ TargetCPU);
+ if (MCE == 0 || MAB == 0)
+ return true;
+
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(),
+ *Context, *MAB, Out,
+ MCE, hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->setAutoInitSections(true);
+ break;
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(createNullStreamer(*Context));
+ break;
+ }
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ return false;
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted. This uses a JITCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions. This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+ JITCodeEmitter &JCE,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
+ if (!Context)
+ return true;
+
+ addCodeEmitter(PM, JCE);
+
+ return false; // success!
+}
+
+/// addPassesToEmitMC - Add passes to the specified pass manager to get
+/// machine code emitted with the MCJIT. This method returns true if machine
+/// code is not supported. It fills the MCContext Ctx pointer which can be
+/// used to build custom MCStreamer.
+///
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
+ MCContext *&Ctx,
+ raw_ostream &Out,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
+ if (!Ctx)
+ return true;
+
+ if (hasMCSaveTempLabels())
+ Ctx->setAllowTemporaryLabels(false);
+
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ const MCRegisterInfo &MRI = *getRegisterInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
+ STI, *Ctx);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
+ if (MCE == 0 || MAB == 0)
+ return true;
+
+ OwningPtr<MCStreamer> AsmStreamer;
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), *Ctx,
+ *MAB, Out, MCE,
+ hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->InitSections();
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ return false; // success!
+}
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 0000000..deab05a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,152 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return RHSNum < LHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+
+ return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ }
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+ Queue.push_back(SU);
+}
+
+
+// scheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor. If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+ }
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+SUnit *LatencyPriorityQueue::pop() {
+ if (empty()) return NULL;
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+ Queue.pop_back();
+ return V;
+}
+
+void LatencyPriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+}
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+ LatencyPriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
new file mode 100644
index 0000000..8172154
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -0,0 +1,335 @@
+//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LexicalScopes analysis.
+//
+// This pass collects lexical scope information and maps machine instructions
+// to respective lexical scopes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lexicalscopes"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+LexicalScopes::~LexicalScopes() {
+ releaseMemory();
+}
+
+/// releaseMemory - release memory.
+void LexicalScopes::releaseMemory() {
+ MF = NULL;
+ CurrentFnLexicalScope = NULL;
+ DeleteContainerSeconds(LexicalScopeMap);
+ DeleteContainerSeconds(AbstractScopeMap);
+ InlinedLexicalScopeMap.clear();
+ AbstractScopesList.clear();
+}
+
+/// initialize - Scan machine function and constuct lexical scope nest.
+void LexicalScopes::initialize(const MachineFunction &Fn) {
+ releaseMemory();
+ MF = &Fn;
+ SmallVector<InsnRange, 4> MIRanges;
+ DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap;
+ extractLexicalScopes(MIRanges, MI2ScopeMap);
+ if (CurrentFnLexicalScope) {
+ constructScopeNest(CurrentFnLexicalScope);
+ assignInstructionRanges(MIRanges, MI2ScopeMap);
+ }
+}
+
+/// extractLexicalScopes - Extract instruction ranges for each lexical scopes
+/// for the given machine function.
+void LexicalScopes::
+extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+
+ // Scan each instruction and create scopes. First build working set of scopes.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ const MachineInstr *RangeBeginMI = NULL;
+ const MachineInstr *PrevMI = NULL;
+ DebugLoc PrevDL;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+
+ // Check if instruction has valid location information.
+ const DebugLoc MIDL = MInsn->getDebugLoc();
+ if (MIDL.isUnknown()) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // If scope has not changed then skip this instruction.
+ if (MIDL == PrevDL) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // Ignore DBG_VALUE. It does not contribute to any instruction in output.
+ if (MInsn->isDebugValue())
+ continue;
+
+ if (RangeBeginMI) {
+ // If we have already seen a beginning of an instruction range and
+ // current instruction scope does not match scope of first instruction
+ // in this range then create a new instruction range.
+ InsnRange R(RangeBeginMI, PrevMI);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ MIRanges.push_back(R);
+ }
+
+ // This is a beginning of a new instruction range.
+ RangeBeginMI = MInsn;
+
+ // Reset previous markers.
+ PrevMI = MInsn;
+ PrevDL = MIDL;
+ }
+
+ // Create last instruction range.
+ if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+ InsnRange R(RangeBeginMI, PrevMI);
+ MIRanges.push_back(R);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ }
+ }
+}
+
+/// findLexicalScope - Find lexical scope, either regular or inlined, for the
+/// given DebugLoc. Return NULL if not found.
+LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
+ MDNode *Scope = NULL;
+ MDNode *IA = NULL;
+ DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
+ if (!Scope) return NULL;
+
+ // The scope that we were created with could have an extra file - which
+ // isn't what we care about in this case.
+ DIDescriptor D = DIDescriptor(Scope);
+ if (D.isLexicalBlockFile())
+ Scope = DILexicalBlockFile(Scope).getScope();
+
+ if (IA)
+ return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA));
+ return LexicalScopeMap.lookup(Scope);
+}
+
+/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
+/// not available then create new lexical scope.
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
+ MDNode *Scope = NULL;
+ MDNode *InlinedAt = NULL;
+ DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
+
+ if (InlinedAt) {
+ // Create an abstract scope for inlined function.
+ getOrCreateAbstractScope(Scope);
+ // Create an inlined scope for inlined function.
+ return getOrCreateInlinedScope(Scope, InlinedAt);
+ }
+
+ return getOrCreateRegularScope(Scope);
+}
+
+/// getOrCreateRegularScope - Find or create a regular lexical scope.
+LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
+ DIDescriptor D = DIDescriptor(Scope);
+ if (D.isLexicalBlockFile()) {
+ Scope = DILexicalBlockFile(Scope).getScope();
+ D = DIDescriptor(Scope);
+ }
+
+ LexicalScope *WScope = LexicalScopeMap.lookup(Scope);
+ if (WScope)
+ return WScope;
+
+ LexicalScope *Parent = NULL;
+ if (D.isLexicalBlock())
+ Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
+ WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false);
+ LexicalScopeMap.insert(std::make_pair(Scope, WScope));
+ if (!Parent && DIDescriptor(Scope).isSubprogram()
+ && DISubprogram(Scope).describes(MF->getFunction()))
+ CurrentFnLexicalScope = WScope;
+
+ return WScope;
+}
+
+/// getOrCreateInlinedScope - Find or create an inlined lexical scope.
+LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope,
+ MDNode *InlinedAt) {
+ LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt);
+ if (InlinedScope)
+ return InlinedScope;
+
+ DebugLoc InlinedLoc = DebugLoc::getFromDILocation(InlinedAt);
+ InlinedScope = new LexicalScope(getOrCreateLexicalScope(InlinedLoc),
+ DIDescriptor(Scope), InlinedAt, false);
+ InlinedLexicalScopeMap[InlinedLoc] = InlinedScope;
+ LexicalScopeMap[InlinedAt] = InlinedScope;
+ return InlinedScope;
+}
+
+/// getOrCreateAbstractScope - Find or create an abstract lexical scope.
+LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
+ assert(N && "Invalid Scope encoding!");
+
+ DIDescriptor Scope(N);
+ if (Scope.isLexicalBlockFile())
+ Scope = DILexicalBlockFile(Scope).getScope();
+ LexicalScope *AScope = AbstractScopeMap.lookup(N);
+ if (AScope)
+ return AScope;
+
+ LexicalScope *Parent = NULL;
+ if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ DIDescriptor ParentDesc = DB.getContext();
+ Parent = getOrCreateAbstractScope(ParentDesc);
+ }
+ AScope = new LexicalScope(Parent, DIDescriptor(N), NULL, true);
+ AbstractScopeMap[N] = AScope;
+ if (DIDescriptor(N).isSubprogram())
+ AbstractScopesList.push_back(AScope);
+ return AScope;
+}
+
+/// constructScopeNest
+void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
+ assert (Scope && "Unable to calculate scop edominance graph!");
+ SmallVector<LexicalScope *, 4> WorkStack;
+ WorkStack.push_back(Scope);
+ unsigned Counter = 0;
+ while (!WorkStack.empty()) {
+ LexicalScope *WS = WorkStack.back();
+ const SmallVector<LexicalScope *, 4> &Children = WS->getChildren();
+ bool visitedChildren = false;
+ for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI) {
+ LexicalScope *ChildScope = *SI;
+ if (!ChildScope->getDFSOut()) {
+ WorkStack.push_back(ChildScope);
+ visitedChildren = true;
+ ChildScope->setDFSIn(++Counter);
+ break;
+ }
+ }
+ if (!visitedChildren) {
+ WorkStack.pop_back();
+ WS->setDFSOut(++Counter);
+ }
+ }
+}
+
+/// assignInstructionRanges - Find ranges of instructions covered by each
+/// lexical scope.
+void LexicalScopes::
+assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap)
+{
+
+ LexicalScope *PrevLexicalScope = NULL;
+ for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
+ RE = MIRanges.end(); RI != RE; ++RI) {
+ const InsnRange &R = *RI;
+ LexicalScope *S = MI2ScopeMap.lookup(R.first);
+ assert (S && "Lost LexicalScope for a machine instruction!");
+ if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
+ PrevLexicalScope->closeInsnRange(S);
+ S->openInsnRange(R.first);
+ S->extendInsnRange(R.second);
+ PrevLexicalScope = S;
+ }
+
+ if (PrevLexicalScope)
+ PrevLexicalScope->closeInsnRange();
+}
+
+/// getMachineBasicBlocks - Populate given set using machine basic blocks which
+/// have machine instructions that belong to lexical scope identified by
+/// DebugLoc.
+void LexicalScopes::
+getMachineBasicBlocks(DebugLoc DL,
+ SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) {
+ MBBs.clear();
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return;
+
+ if (Scope == CurrentFnLexicalScope) {
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I)
+ MBBs.insert(I);
+ return;
+ }
+
+ SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges();
+ for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(),
+ E = InsnRanges.end(); I != E; ++I) {
+ InsnRange &R = *I;
+ MBBs.insert(R.first->getParent());
+ }
+}
+
+/// dominates - Return true if DebugLoc's lexical scope dominates at least one
+/// machine instruction's lexical scope in a given machine basic block.
+bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return false;
+
+ // Current function scope covers all basic blocks in the function.
+ if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF)
+ return true;
+
+ bool Result = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (IDL.isUnknown())
+ continue;
+ if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
+ if (Scope->dominates(IScope))
+ return true;
+ }
+ return Result;
+}
+
+void LexicalScope::anchor() { }
+
+/// dump - Print data structures.
+void LexicalScope::dump(unsigned Indent) const {
+#ifndef NDEBUG
+ raw_ostream &err = dbgs();
+ err.indent(Indent);
+ err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
+ const MDNode *N = Desc;
+ err.indent(Indent);
+ N->dump();
+ if (AbstractScope)
+ err << std::string(Indent, ' ') << "Abstract Scope\n";
+
+ if (!Children.empty())
+ err << std::string(Indent + 2, ' ') << "Children ...\n";
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ if (Children[i] != this)
+ Children[i]->dump(Indent + 2);
+#endif
+}
+
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 0000000..0b117ac
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,995 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+ cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+namespace {
+/// UserValueScopes - Keeps track of lexical scopes associated with an
+/// user value's source location.
+class UserValueScopes {
+ DebugLoc DL;
+ LexicalScopes &LS;
+ SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
+
+public:
+ UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(D), LS(L) {}
+
+ /// dominates - Return true if current scope dominates at least one machine
+ /// instruction in a given machine basic block.
+ bool dominates(MachineBasicBlock *MBB) {
+ if (LBlocks.empty())
+ LS.getMachineBasicBlocks(DL, LBlocks);
+ if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB))
+ return true;
+ return false;
+ }
+};
+} // end anonymous namespace
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class LDVImpl;
+class UserValue {
+ const MDNode *variable; ///< The debug info variable we are part of.
+ unsigned offset; ///< Byte offset into variable.
+ DebugLoc dl; ///< The debug location for the variable. This is
+ ///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next; ///< Next value in equivalence class, or null.
+
+ /// Numbered locations referenced by locmap.
+ SmallVector<MachineOperand, 4> locations;
+
+ /// Map of slot indices where this value is live.
+ LocMap locInts;
+
+ /// coalesceLocation - After LocNo was changed, check if it has become
+ /// identical to another location, and coalesce them. This may cause LocNo or
+ /// a later location to be erased, but no earlier location will be erased.
+ void coalesceLocation(unsigned LocNo);
+
+ /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+ void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+ /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+ /// is live. Returns true if any changes were made.
+ bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+public:
+ /// UserValue - Create a new UserValue.
+ UserValue(const MDNode *var, unsigned o, DebugLoc L,
+ LocMap::Allocator &alloc)
+ : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+ {}
+
+ /// getLeader - Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// getNext - Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// match - Does this UserValue match the parameters?
+ bool match(const MDNode *Var, unsigned Offset) const {
+ return Var == variable && Offset == offset;
+ }
+
+ /// merge - Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next)
+ End->leader = L1, End = End->next;
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
+ }
+
+ /// getLocationNo - Return the location number that matches Loc.
+ unsigned getLocationNo(const MachineOperand &LocMO) {
+ if (LocMO.isReg()) {
+ if (LocMO.getReg() == 0)
+ return ~0u;
+ // For register locations we dont care about use/def and other flags.
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ locations[i].getReg() == LocMO.getReg() &&
+ locations[i].getSubReg() == LocMO.getSubReg())
+ return i;
+ } else
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (LocMO.isIdenticalTo(locations[i]))
+ return i;
+ locations.push_back(LocMO);
+ // We are storing a MachineOperand outside a MachineInstr.
+ locations.back().clearParent();
+ // Don't store def operands.
+ if (locations.back().isReg())
+ locations.back().setIsUse();
+ return locations.size() - 1;
+ }
+
+ /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+ void mapVirtRegs(LDVImpl *LDV);
+
+ /// addDef - Add a definition point to this value.
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ // Add a singular (Idx,Idx) -> Loc mapping.
+ LocMap::iterator I = locInts.find(Idx);
+ if (!I.valid() || I.start() != Idx)
+ I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+ else
+ // A later DBG_VALUE at the same SlotIndex overrides the old location.
+ I.setValue(getLocationNo(LocMO));
+ }
+
+ /// extendDef - Extend the current definition as far as possible down the
+ /// dominator tree. Stop when meeting an existing def or when leaving the live
+ /// range of VNI.
+ /// End points where VNI is no longer live are added to Kills.
+ /// @param Idx Starting point for the definition.
+ /// @param LocNo Location number to propagate.
+ /// @param LI Restrict liveness to where LI has the value VNI. May be null.
+ /// @param VNI When LI is not null, this is the value to restrict to.
+ /// @param Kills Append end points of VNI's live range to Kills.
+ /// @param LIS Live intervals analysis.
+ /// @param MDT Dominator tree.
+ void extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
+
+ /// addDefsFromCopies - The value in LI/LocNo may be copies to other
+ /// registers. Determine if any of the copies are available at the kill
+ /// points, and add defs if possible.
+ /// @param LI Scan for copies of the value in LI->reg.
+ /// @param LocNo Location number of LI->reg.
+ /// @param Kills Points where the range of LocNo could be extended.
+ /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+ void addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS);
+
+ /// computeIntervals - Compute the live intervals of all locations after
+ /// collecting all their def points.
+ void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
+
+ /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+ /// live. Returns true if any changes were made.
+ bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+ /// rewriteLocations - Rewrite virtual register locations according to the
+ /// provided virtual register map.
+ void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+ /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM,
+ LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+ /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+ /// variable may have more than one corresponding DBG_VALUE instructions.
+ /// Only first one needs DebugLoc to identify variable's lexical scope
+ /// in source file.
+ DebugLoc findDebugLoc();
+
+ /// getDebugLoc - Return DebugLoc of this UserValue.
+ DebugLoc getDebugLoc() { return dl;}
+ void print(raw_ostream&, const TargetMachine*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+ LiveDebugVariables &pass;
+ LocMap::Allocator allocator;
+ MachineFunction *MF;
+ LiveIntervals *LIS;
+ LexicalScopes LS;
+ MachineDominatorTree *MDT;
+ const TargetRegisterInfo *TRI;
+
+ /// Whether emitDebugValues is called.
+ bool EmitDone;
+ /// Whether the machine function is modified during the pass.
+ bool ModifiedMF;
+
+ /// userValues - All allocated UserValue instances.
+ SmallVector<UserValue*, 8> userValues;
+
+ /// Map virtual register to eq class leader.
+ typedef DenseMap<unsigned, UserValue*> VRMap;
+ VRMap virtRegToEqClass;
+
+ /// Map user variable to eq class leader.
+ typedef DenseMap<const MDNode *, UserValue*> UVMap;
+ UVMap userVarMap;
+
+ /// getUserValue - Find or create a UserValue.
+ UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+ /// lookupVirtReg - Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(unsigned VirtReg);
+
+ /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+ /// @param MI DBG_VALUE instruction
+ /// @param Idx Last valid SLotIndex before instruction.
+ /// @return True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+ /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+ /// a UserValue def for each instruction.
+ /// @param mf MachineFunction to be scanned.
+ /// @return True if any debug values were found.
+ bool collectDebugValues(MachineFunction &mf);
+
+ /// computeIntervals - Compute the live intervals of all user values after
+ /// collecting all their def points.
+ void computeIntervals();
+
+public:
+ LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false),
+ ModifiedMF(false) {}
+ bool runOnMachineFunction(MachineFunction &mf);
+
+ /// clear - Release all memory.
+ void clear() {
+ DeleteContainerPointers(userValues);
+ userValues.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
+ // Make sure we call emitDebugValues if the machine function was modified.
+ assert((!ModifiedMF || EmitDone) &&
+ "Dbg values are not emitted in LDV");
+ EmitDone = false;
+ ModifiedMF = false;
+ }
+
+ /// mapVirtReg - Map virtual register to an equivalence class.
+ void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+ /// splitRegister - Replace all references to OldReg with NewRegs.
+ void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+ /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
+ DIVariable DV(variable);
+ OS << "!\"";
+ DV.printExtendedName(OS);
+ OS << "\"\t";
+ if (offset)
+ OS << '+' << offset;
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ OS << " [" << I.start() << ';' << I.stop() << "):";
+ if (I.value() == ~0u)
+ OS << "undef";
+ else
+ OS << I.value();
+ }
+ for (unsigned i = 0, e = locations.size(); i != e; ++i) {
+ OS << " Loc" << i << '=';
+ locations[i].print(OS, TM);
+ }
+ OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+ OS << "********** DEBUG VARIABLES **********\n";
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+ userValues[i]->print(OS, &MF->getTarget());
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+ unsigned KeepLoc = 0;
+ for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+ if (KeepLoc == LocNo)
+ continue;
+ if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+ break;
+ }
+ // No matches.
+ if (KeepLoc == locations.size())
+ return;
+
+ // Keep the smaller location, erase the larger one.
+ unsigned EraseLoc = LocNo;
+ if (KeepLoc > EraseLoc)
+ std::swap(KeepLoc, EraseLoc);
+ locations.erase(locations.begin() + EraseLoc);
+
+ // Rewrite values.
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ unsigned v = I.value();
+ if (v == EraseLoc)
+ I.setValue(KeepLoc); // Coalesce when possible.
+ else if (v > EraseLoc)
+ I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+ }
+}
+
+void UserValue::mapVirtRegs(LDVImpl *LDV) {
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ TargetRegisterInfo::isVirtualRegister(locations[i].getReg()))
+ LDV->mapVirtReg(locations[i].getReg(), this);
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+ DebugLoc DL) {
+ UserValue *&Leader = userVarMap[Var];
+ if (Leader) {
+ UserValue *UV = Leader->getLeader();
+ Leader = UV;
+ for (; UV; UV = UV->getNext())
+ if (UV->match(Var, Offset))
+ return UV;
+ }
+
+ UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+ userValues.push_back(UV);
+ Leader = UserValue::merge(Leader, UV);
+ return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
+ return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+ // DBG_VALUE loc, offset, variable
+ if (MI->getNumOperands() != 3 ||
+ !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+ DEBUG(dbgs() << "Can't handle " << *MI);
+ return false;
+ }
+
+ // Get or create the UserValue for (variable,offset).
+ unsigned Offset = MI->getOperand(1).getImm();
+ const MDNode *Var = MI->getOperand(2).getMetadata();
+ UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+ UV->addDef(Idx, MI->getOperand(0));
+ return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE;) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ // DBG_VALUE has no slot index, use the previous instruction instead.
+ SlotIndex Idx = MBBI == MBB->begin() ?
+ LIS->getMBBStartIdx(MBB) :
+ LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot();
+ // Handle consecutive DBG_VALUE instructions with the same slot index.
+ do {
+ if (handleDebugValue(MBBI, Idx)) {
+ MBBI = MBB->erase(MBBI);
+ Changed = true;
+ } else
+ ++MBBI;
+ } while (MBBI != MBBE && MBBI->isDebugValue());
+ }
+ }
+ return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
+ SmallVector<SlotIndex, 16> Todo;
+ Todo.push_back(Idx);
+ do {
+ SlotIndex Start = Todo.pop_back_val();
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to VNI's live range.
+ bool ToEnd = true;
+ if (LI && VNI) {
+ LiveRange *Range = LI->getLiveRangeContaining(Start);
+ if (!Range || Range->valno != VNI) {
+ if (Kills)
+ Kills->push_back(Start);
+ continue;
+ }
+ if (Range->end < Stop)
+ Stop = Range->end, ToEnd = false;
+ }
+
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != LocNo || I.stop() != Start)
+ continue;
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
+
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop)
+ Stop = I.start(), ToEnd = false;
+ // Limited by VNI's live range.
+ else if (!ToEnd && Kills)
+ Kills->push_back(Stop);
+
+ if (Start >= Stop)
+ continue;
+
+ I.insert(Start, Stop, LocNo);
+
+ // If we extended to the MBB end, propagate down the dominator tree.
+ if (!ToEnd)
+ continue;
+ const std::vector<MachineDomTreeNode*> &Children =
+ MDT.getNode(MBB)->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Children[i]->getBlock();
+ if (UVS.dominates(MBB))
+ Todo.push_back(LIS.getMBBStartIdx(MBB));
+ }
+ } while (!Todo.empty());
+}
+
+void
+UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+ if (Kills.empty())
+ return;
+ // Don't track copies from physregs, there are too many uses.
+ if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
+ return;
+
+ // Collect all the (vreg, valno) pairs that are copies of LI.
+ SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI.use_nodbg_begin(LI->reg),
+ UE = MRI.use_nodbg_end(); UI != UE; ++UI) {
+ // Copies of the full value.
+ if (UI.getOperand().getSubReg() || !UI->isCopy())
+ continue;
+ MachineInstr *MI = &*UI;
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // Don't follow copies to physregs. These are usually setting up call
+ // arguments, and the argument registers are always call clobbered. We are
+ // better off in the source register which could be a callee-saved register,
+ // or it could be spilled.
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ continue;
+
+ // Is LocNo extended to reach this copy? If not, another def may be blocking
+ // it, or we are looking at a wrong value of LI.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
+ if (!I.valid() || I.value() != LocNo)
+ continue;
+
+ if (!LIS.hasInterval(DstReg))
+ continue;
+ LiveInterval *DstLI = &LIS.getInterval(DstReg);
+ const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
+ CopyValues.push_back(std::make_pair(DstLI, DstVNI));
+ }
+
+ if (CopyValues.empty())
+ return;
+
+ DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n');
+
+ // Try to add defs of the copied values for each kill point.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ SlotIndex Idx = Kills[i];
+ for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) {
+ LiveInterval *DstLI = CopyValues[j].first;
+ const VNInfo *DstVNI = CopyValues[j].second;
+ if (DstLI->getVNInfoAt(Idx) != DstVNI)
+ continue;
+ // Check that there isn't already a def at Idx
+ LocMap::iterator I = locInts.find(Idx);
+ if (I.valid() && I.start() <= Idx)
+ continue;
+ DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
+ << DstVNI->id << " in " << *DstLI << '\n');
+ MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
+ assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
+ unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
+ I.insert(Idx, Idx.getNextSlot(), LocNo);
+ NewDefs.push_back(std::make_pair(Idx, LocNo));
+ break;
+ }
+ }
+}
+
+void
+UserValue::computeIntervals(MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ LiveIntervals &LIS,
+ MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
+ SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+ // Collect all defs to be extended (Skipping undefs).
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+ if (I.value() != ~0u)
+ Defs.push_back(std::make_pair(I.start(), I.value()));
+
+ // Extend all defs, and possibly add new ones along the way.
+ for (unsigned i = 0; i != Defs.size(); ++i) {
+ SlotIndex Idx = Defs[i].first;
+ unsigned LocNo = Defs[i].second;
+ const MachineOperand &Loc = locations[LocNo];
+
+ if (!Loc.isReg()) {
+ extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS);
+ continue;
+ }
+
+ // Register locations are constrained to where the register value is live.
+ if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
+ LiveInterval *LI = 0;
+ const VNInfo *VNI = 0;
+ if (LIS.hasInterval(Loc.getReg())) {
+ LI = &LIS.getInterval(Loc.getReg());
+ VNI = LI->getVNInfoAt(Idx);
+ }
+ SmallVector<SlotIndex, 16> Kills;
+ extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
+ if (LI)
+ addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+ continue;
+ }
+
+ // For physregs, use the live range of the first regunit as a guide.
+ unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI);
+ LiveInterval *LI = &LIS.getRegUnit(Unit);
+ const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ // Don't track copies from physregs, it is too expensive.
+ extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS);
+ }
+
+ // Finally, erase all the undefs.
+ for (LocMap::iterator I = locInts.begin(); I.valid();)
+ if (I.value() == ~0u)
+ I.erase();
+ else
+ ++I;
+}
+
+void LDVImpl::computeIntervals() {
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
+ userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS);
+ userValues[i]->mapVirtRegs(this);
+ }
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ LIS = &pass.getAnalysis<LiveIntervals>();
+ MDT = &pass.getAnalysis<MachineDominatorTree>();
+ TRI = mf.getTarget().getRegisterInfo();
+ clear();
+ LS.initialize(mf);
+ DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+ << mf.getName() << " **********\n");
+
+ bool Changed = collectDebugValues(mf);
+ computeIntervals();
+ DEBUG(print(dbgs()));
+ LS.releaseMemory();
+ ModifiedMF = Changed;
+ return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+ if (!EnableLDV)
+ return false;
+ if (!pImpl)
+ pImpl = new LDVImpl(this);
+ return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+ if (pImpl)
+ delete static_cast<LDVImpl*>(pImpl);
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+bool
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) {
+ DEBUG({
+ dbgs() << "Splitting Loc" << OldLocNo << '\t';
+ print(dbgs(), 0);
+ });
+ bool DidChange = false;
+ LocMap::iterator LocMapI;
+ LocMapI.setMap(locInts);
+ for (unsigned i = 0; i != NewRegs.size(); ++i) {
+ LiveInterval *LI = NewRegs[i];
+ if (LI->empty())
+ continue;
+
+ // Don't allocate the new LocNo until it is needed.
+ unsigned NewLocNo = ~0u;
+
+ // Iterate over the overlaps between locInts and LI.
+ LocMapI.find(LI->beginIndex());
+ if (!LocMapI.valid())
+ continue;
+ LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start());
+ LiveInterval::iterator LIE = LI->end();
+ while (LocMapI.valid() && LII != LIE) {
+ // At this point, we know that LocMapI.stop() > LII->start.
+ LII = LI->advanceTo(LII, LocMapI.start());
+ if (LII == LIE)
+ break;
+
+ // Now LII->end > LocMapI.start(). Do we have an overlap?
+ if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) {
+ // Overlapping correct location. Allocate NewLocNo now.
+ if (NewLocNo == ~0u) {
+ MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
+ MO.setSubReg(locations[OldLocNo].getSubReg());
+ NewLocNo = getLocationNo(MO);
+ DidChange = true;
+ }
+
+ SlotIndex LStart = LocMapI.start();
+ SlotIndex LStop = LocMapI.stop();
+
+ // Trim LocMapI down to the LII overlap.
+ if (LStart < LII->start)
+ LocMapI.setStartUnchecked(LII->start);
+ if (LStop > LII->end)
+ LocMapI.setStopUnchecked(LII->end);
+
+ // Change the value in the overlap. This may trigger coalescing.
+ LocMapI.setValue(NewLocNo);
+
+ // Re-insert any removed OldLocNo ranges.
+ if (LStart < LocMapI.start()) {
+ LocMapI.insert(LStart, LocMapI.start(), OldLocNo);
+ ++LocMapI;
+ assert(LocMapI.valid() && "Unexpected coalescing");
+ }
+ if (LStop > LocMapI.stop()) {
+ ++LocMapI;
+ LocMapI.insert(LII->end, LStop, OldLocNo);
+ --LocMapI;
+ }
+ }
+
+ // Advance to the next overlap.
+ if (LII->end < LocMapI.stop()) {
+ if (++LII == LIE)
+ break;
+ LocMapI.advanceTo(LII->start);
+ } else {
+ ++LocMapI;
+ if (!LocMapI.valid())
+ break;
+ LII = LI->advanceTo(LII, LocMapI.start());
+ }
+ }
+ }
+
+ // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
+ locations.erase(locations.begin() + OldLocNo);
+ LocMapI.goToBegin();
+ while (LocMapI.valid()) {
+ unsigned v = LocMapI.value();
+ if (v == OldLocNo) {
+ DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
+ << LocMapI.stop() << ")\n");
+ LocMapI.erase();
+ } else {
+ if (v > OldLocNo)
+ LocMapI.setValueUnchecked(v-1);
+ ++LocMapI;
+ }
+ }
+
+ DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);});
+ return DidChange;
+}
+
+bool
+UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ bool DidChange = false;
+ // Split locations referring to OldReg. Iterate backwards so splitLocation can
+ // safely erase unused locations.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ const MachineOperand *Loc = &locations[LocNo];
+ if (!Loc->isReg() || Loc->getReg() != OldReg)
+ continue;
+ DidChange |= splitLocation(LocNo, NewRegs);
+ }
+ return DidChange;
+}
+
+void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ bool DidChange = false;
+ for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+ DidChange |= UV->splitRegister(OldReg, NewRegs);
+
+ if (!DidChange)
+ return;
+
+ // Map all of the new virtual registers.
+ UserValue *UV = lookupVirtReg(OldReg);
+ for (unsigned i = 0; i != NewRegs.size(); ++i)
+ mapVirtReg(NewRegs[i]->reg, UV);
+}
+
+void LiveDebugVariables::
+splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+ // Iterate over locations in reverse makes it easier to handle coalescing.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ MachineOperand &Loc = locations[LocNo];
+ // Only virtual registers are rewritten.
+ if (!Loc.isReg() || !Loc.getReg() ||
+ !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+ continue;
+ unsigned VirtReg = Loc.getReg();
+ if (VRM.isAssignedReg(VirtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ // This can create a %noreg operand in rare cases when the sub-register
+ // index is no longer available. That means the user value is in a
+ // non-existent sub-register, and %noreg is exactly what we want.
+ Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
+ // FIXME: Translate SubIdx to a stackslot offset.
+ Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+ } else {
+ Loc.setReg(0);
+ Loc.setSubReg(0);
+ }
+ coalesceLocation(LocNo);
+ }
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS) {
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ Idx = Idx.getBaseIndex();
+
+ // Try to find an insert location by going backwards from Idx.
+ MachineInstr *MI;
+ while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+ // We've reached the beginning of MBB.
+ if (Idx == Start) {
+ MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+ return I;
+ }
+ Idx = Idx.getPrevIndex();
+ }
+
+ // Don't insert anything after the first terminator, though.
+ return MI->isTerminator() ? MBB->getFirstTerminator() :
+ llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+ DebugLoc D = dl;
+ dl = DebugLoc();
+ return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+ unsigned LocNo,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ MachineOperand &Loc = locations[LocNo];
+ ++NumInsertedDebugValues;
+
+ // Frame index locations may require a target callback.
+ if (Loc.isFI()) {
+ MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+ Loc.getIndex(), offset, variable,
+ findDebugLoc());
+ if (MI) {
+ MBB->insert(I, MI);
+ return;
+ }
+ }
+ // This is not a frame index, or the target is happy with a standard FI.
+ BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+ for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+ SlotIndex Start = I.start();
+ SlotIndex Stop = I.stop();
+ unsigned LocNo = I.value();
+ DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ // This interval may span multiple basic blocks.
+ // Insert a DBG_VALUE into each one.
+ while(Stop > MBBEnd) {
+ // Move to the next block.
+ Start = MBBEnd;
+ if (++MBB == MFEnd)
+ break;
+ MBBEnd = LIS.getMBBEndIdx(MBB);
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ }
+ DEBUG(dbgs() << '\n');
+ if (MBB == MFEnd)
+ break;
+
+ ++I;
+ }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+ DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
+ userValues[i]->rewriteLocations(*VRM, *TRI);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+ }
+ EmitDone = true;
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 0000000..3ce3c39
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,70 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class LiveInterval;
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+ void *pImpl;
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ LiveDebugVariables();
+ ~LiveDebugVariables();
+
+ /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+ /// @param OldReg Old virtual register that is going away.
+ /// @param NewReg New register holding the user variables.
+ /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+ /// register.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// splitRegister - Move any user variables in OldReg to the live ranges in
+ /// NewRegs where they are live. Mark the values as unavailable where no new
+ /// register is live.
+ void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+ /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+ /// that happened during register allocation.
+ /// @param VRM Rename virtual registers according to map.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ /// dump - Print data structures to dbgs().
+ void dump();
+
+private:
+
+ virtual bool runOnMachineFunction(MachineFunction &);
+ virtual void releaseMemory();
+ virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 0000000..dccd847
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,951 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes. Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' and there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+ // This algorithm is basically std::upper_bound.
+ // Unfortunately, std::upper_bound cannot be used with mixed types until we
+ // adopt C++0x. Many libraries can do it, but not all.
+ if (empty() || Pos >= endIndex())
+ return end();
+ iterator I = begin();
+ size_t Len = ranges.size();
+ do {
+ size_t Mid = Len >> 1;
+ if (Pos < I[Mid].end)
+ Len = Mid;
+ else
+ I += Mid + 1, Len -= Mid + 1;
+ } while (Len);
+ return I;
+}
+
+VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
+ VNInfo::Allocator &VNInfoAllocator) {
+ assert(!Def.isDead() && "Cannot define a value at the dead slot");
+ iterator I = find(Def);
+ if (I == end()) {
+ VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
+ ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+ if (SlotIndex::isSameInstr(Def, I->start)) {
+ assert(I->valno->def == I->start && "Inconsistent existing value def");
+
+ // It is possible to have both normal and early-clobber defs of the same
+ // register on an instruction. It doesn't make a lot of sense, but it is
+ // possible to specify in inline assembly.
+ //
+ // Just convert everything to early-clobber.
+ Def = std::min(Def, I->start);
+ if (Def != I->start)
+ I->start = I->valno->def = Def;
+ return I->valno;
+ }
+ assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
+ VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
+ ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+ const_iterator StartPos) const {
+ assert(!empty() && "empty interval");
+ const_iterator i = begin();
+ const_iterator ie = end();
+ const_iterator j = StartPos;
+ const_iterator je = other.end();
+
+ assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+ StartPos != other.end() && "Bogus start position hint!");
+
+ if (i->start < j->start) {
+ i = std::upper_bound(i, ie, j->start);
+ if (i != ranges.begin()) --i;
+ } else if (j->start < i->start) {
+ ++StartPos;
+ if (StartPos != other.end() && StartPos->start <= i->start) {
+ assert(StartPos < other.end() && i < end());
+ j = std::upper_bound(j, je, i->start);
+ if (j != other.ranges.begin()) --j;
+ }
+ } else {
+ return true;
+ }
+
+ if (j == je) return false;
+
+ while (i != ie) {
+ if (i->start > j->start) {
+ std::swap(i, j);
+ std::swap(ie, je);
+ }
+
+ if (i->end > j->start)
+ return true;
+ ++i;
+ }
+
+ return false;
+}
+
+bool LiveInterval::overlaps(const LiveInterval &Other,
+ const CoalescerPair &CP,
+ const SlotIndexes &Indexes) const {
+ assert(!empty() && "empty interval");
+ if (Other.empty())
+ return false;
+
+ // Use binary searches to find initial positions.
+ const_iterator I = find(Other.beginIndex());
+ const_iterator IE = end();
+ if (I == IE)
+ return false;
+ const_iterator J = Other.find(I->start);
+ const_iterator JE = Other.end();
+ if (J == JE)
+ return false;
+
+ for (;;) {
+ // J has just been advanced to satisfy:
+ assert(J->end >= I->start);
+ // Check for an overlap.
+ if (J->start < I->end) {
+ // I and J are overlapping. Find the later start.
+ SlotIndex Def = std::max(I->start, J->start);
+ // Allow the overlap if Def is a coalescable copy.
+ if (Def.isBlock() ||
+ !CP.isCoalescable(Indexes.getInstructionFromIndex(Def)))
+ return true;
+ }
+ // Advance the iterator that ends first to check for more overlaps.
+ if (J->end > I->end) {
+ std::swap(I, J);
+ std::swap(IE, JE);
+ }
+ // Advance J until J->end >= I->start.
+ do
+ if (++J == JE)
+ return false;
+ while (J->end < I->start);
+ }
+}
+
+/// overlaps - Return true if the live interval overlaps a range specified
+/// by [Start, End).
+bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
+ assert(Start < End && "Invalid range");
+ const_iterator I = std::lower_bound(begin(), end(), End);
+ return I != begin() && (--I)->end > Start;
+}
+
+
+/// ValNo is dead, remove it. If it is the largest value number, just nuke it
+/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
+/// it can be nuked later.
+void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ valnos.pop_back();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ ValNo->markUnused();
+ }
+}
+
+/// RenumberValues - Renumber all values in order of appearance and delete the
+/// remaining unused values.
+void LiveInterval::RenumberValues(LiveIntervals &lis) {
+ SmallPtrSet<VNInfo*, 8> Seen;
+ valnos.clear();
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ VNInfo *VNI = I->valno;
+ if (!Seen.insert(VNI))
+ continue;
+ assert(!VNI->isUnused() && "Unused valno used by live range");
+ VNI->id = (unsigned)valnos.size();
+ valnos.push_back(VNI);
+ }
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with. The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = llvm::next(I);
+ for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ }
+
+ // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+ I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+ // If the newly formed range now touches the range after it and if they have
+ // the same value number, merge the two ranges into one range.
+ if (MergeTo != ranges.end() && MergeTo->start <= I->end &&
+ MergeTo->valno == ValNo) {
+ I->end = MergeTo->end;
+ ++MergeTo;
+ }
+
+ // Erase any dead ranges.
+ ranges.erase(llvm::next(I), MergeTo);
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = I;
+ do {
+ if (MergeTo == ranges.begin()) {
+ I->start = NewStart;
+ ranges.erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another interval, just delete a range and
+ // extend that interval.
+ if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+ MergeTo->end = I->end;
+ } else {
+ // Otherwise, extend the interval right after.
+ ++MergeTo;
+ MergeTo->start = NewStart;
+ MergeTo->end = I->end;
+ }
+
+ ranges.erase(llvm::next(MergeTo), llvm::next(I));
+ return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+ SlotIndex Start = LR.start, End = LR.end;
+ iterator it = std::upper_bound(From, ranges.end(), Start);
+
+ // If the inserted interval starts in the middle or right at the end of
+ // another interval, just extend that interval to contain the range of LR.
+ if (it != ranges.begin()) {
+ iterator B = prior(it);
+ if (LR.valno == B->valno) {
+ if (B->start <= Start && B->end >= Start) {
+ extendIntervalEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(B->end <= Start &&
+ "Cannot overlap two LiveRanges with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this range ends in the middle of, or right next to, another
+ // interval, merge it into that interval.
+ if (it != ranges.end()) {
+ if (LR.valno == it->valno) {
+ if (it->start <= End) {
+ it = extendIntervalStartTo(it, Start);
+
+ // If LR is a complete superset of an interval, we may need to grow its
+ // endpoint as well.
+ if (End > it->end)
+ extendIntervalEndTo(it, End);
+ return it;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(it->start >= End &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+ }
+
+ // Otherwise, this is just a new range that doesn't interact with anything.
+ // Insert it.
+ return ranges.insert(it, LR);
+}
+
+/// extendInBlock - If this interval is live before Kill in the basic
+/// block that starts at StartIdx, extend it to be live up to Kill and return
+/// the value. If there is no live range before Kill, return NULL.
+VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
+ if (empty())
+ return 0;
+ iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot());
+ if (I == begin())
+ return 0;
+ --I;
+ if (I->end <= StartIdx)
+ return 0;
+ if (I->end < Kill)
+ extendIntervalEndTo(I, Kill);
+ return I->valno;
+}
+
+/// removeRange - Remove the specified range from this interval. Note that
+/// the range must be in a single LiveRange in its entirety.
+void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
+ bool RemoveDeadValNo) {
+ // Find the LiveRange containing this span.
+ Ranges::iterator I = find(Start);
+ assert(I != ranges.end() && "Range is not in interval!");
+ assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
+
+ // If the span we are removing is at the start of the LiveRange, adjust it.
+ VNInfo *ValNo = I->valno;
+ if (I->start == Start) {
+ if (I->end == End) {
+ if (RemoveDeadValNo) {
+ // Check if val# is dead.
+ bool isDead = true;
+ for (const_iterator II = begin(), EE = end(); II != EE; ++II)
+ if (II != I && II->valno == ValNo) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
+ }
+ }
+
+ ranges.erase(I); // Removed the whole LiveRange.
+ } else
+ I->start = End;
+ return;
+ }
+
+ // Otherwise if the span we are removing is at the end of the LiveRange,
+ // adjust the other way.
+ if (I->end == End) {
+ I->end = Start;
+ return;
+ }
+
+ // Otherwise, we are splitting the LiveRange into two pieces.
+ SlotIndex OldEnd = I->end;
+ I->end = Start; // Trim the old interval.
+
+ // Insert the new one.
+ ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo));
+}
+
+/// removeValNo - Remove all the ranges defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveInterval::removeValNo(VNInfo *ValNo) {
+ if (empty()) return;
+ Ranges::iterator I = ranges.end();
+ Ranges::iterator E = ranges.begin();
+ do {
+ --I;
+ if (I->valno == ValNo)
+ ranges.erase(I);
+ } while (I != E);
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
+}
+
+/// join - Join two live intervals (this, and other) together. This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified. If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other,
+ const int *LHSValNoAssignments,
+ const int *RHSValNoAssignments,
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ MachineRegisterInfo *MRI) {
+ verify();
+
+ // Determine if any of our live range values are mapped. This is uncommon, so
+ // we want to avoid the interval scan if not.
+ bool MustMapCurValNos = false;
+ unsigned NumVals = getNumValNums();
+ unsigned NumNewVals = NewVNInfo.size();
+ for (unsigned i = 0; i != NumVals; ++i) {
+ unsigned LHSValID = LHSValNoAssignments[i];
+ if (i != LHSValID ||
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) {
+ MustMapCurValNos = true;
+ break;
+ }
+ }
+
+ // If we have to apply a mapping to our base interval assignment, rewrite it
+ // now.
+ if (MustMapCurValNos && !empty()) {
+ // Map the first live range.
+
+ iterator OutIt = begin();
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+ for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) {
+ VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+ assert(nextValNo != 0 && "Huh?");
+
+ // If this live range has the same value # as its immediate predecessor,
+ // and if they are neighbors, remove one LiveRange. This happens when we
+ // have [0,4:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == nextValNo && OutIt->end == I->start) {
+ OutIt->end = I->end;
+ } else {
+ // Didn't merge. Move OutIt to the next interval,
+ ++OutIt;
+ OutIt->valno = nextValNo;
+ if (OutIt != I) {
+ OutIt->start = I->start;
+ OutIt->end = I->end;
+ }
+ }
+ }
+ // If we merge some live ranges, chop off the end.
+ ++OutIt;
+ ranges.erase(OutIt, end());
+ }
+
+ // Rewrite Other values before changing the VNInfo ids.
+ // This can leave Other in an invalid state because we're not coalescing
+ // touching segments that now have identical values. That's OK since Other is
+ // not supposed to be valid after calling join();
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+ I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]];
+
+ // Update val# info. Renumber them and make sure they all belong to this
+ // LiveInterval now. Also remove dead val#'s.
+ unsigned NumValNos = 0;
+ for (unsigned i = 0; i < NumNewVals; ++i) {
+ VNInfo *VNI = NewVNInfo[i];
+ if (VNI) {
+ if (NumValNos >= NumVals)
+ valnos.push_back(VNI);
+ else
+ valnos[NumValNos] = VNI;
+ VNI->id = NumValNos++; // Renumber val#.
+ }
+ }
+ if (NumNewVals < NumVals)
+ valnos.resize(NumNewVals); // shrinkify
+
+ // Okay, now insert the RHS live ranges into the LHS.
+ LiveRangeUpdater Updater(this);
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+ Updater.add(*I);
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number. The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+ VNInfo *LHSValNo) {
+ LiveRangeUpdater Updater(this);
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
+ Updater.add(I->start, I->end, LHSValNo);
+}
+
+/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+/// in RHS into this live interval as the specified value number.
+/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+/// current interval, it will replace the value numbers of the overlaped
+/// live ranges with the specified value number.
+void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
+ const VNInfo *RHSValNo,
+ VNInfo *LHSValNo) {
+ LiveRangeUpdater Updater(this);
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
+ if (I->valno == RHSValNo)
+ Updater.add(I->start, I->end, LHSValNo);
+}
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent. This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number. This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+ assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+ // This code actually merges the (numerically) larger value number into the
+ // smaller value number, which is likely to allow us to compactify the value
+ // space. The only thing we have to be careful of is to preserve the
+ // instruction that defines the result value.
+
+ // Make sure V2 is smaller than V1.
+ if (V1->id < V2->id) {
+ V1->copyFrom(*V2);
+ std::swap(V1, V2);
+ }
+
+ // Merge V1 live ranges into V2.
+ for (iterator I = begin(); I != end(); ) {
+ iterator LR = I++;
+ if (LR->valno != V1) continue; // Not a V1 LiveRange.
+
+ // Okay, we found a V1 live range. If it had a previous, touching, V2 live
+ // range, extend it.
+ if (LR != begin()) {
+ iterator Prev = LR-1;
+ if (Prev->valno == V2 && Prev->end == LR->start) {
+ Prev->end = LR->end;
+
+ // Erase this live-range.
+ ranges.erase(LR);
+ I = Prev+1;
+ LR = Prev;
+ }
+ }
+
+ // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+ // Ensure that it is a V2 live-range.
+ LR->valno = V2;
+
+ // If we can merge it into later V2 live ranges, do so now. We ignore any
+ // following V1 live ranges, as they will be merged in subsequent iterations
+ // of the loop.
+ if (I != end()) {
+ if (I->start == LR->end && I->valno == V2) {
+ LR->end = I->end;
+ ranges.erase(I);
+ I = LR+1;
+ }
+ }
+ }
+
+ // Now that V1 is dead, remove it.
+ markValNoForDeletion(V1);
+
+ return V2;
+}
+
+unsigned LiveInterval::getSize() const {
+ unsigned Sum = 0;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ Sum += I->start.distance(I->end);
+ return Sum;
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
+ return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LiveRange::dump() const {
+ dbgs() << *this << "\n";
+}
+#endif
+
+void LiveInterval::print(raw_ostream &OS) const {
+ if (empty())
+ OS << "EMPTY";
+ else {
+ for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+ E = ranges.end(); I != E; ++I) {
+ OS << *I;
+ assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo");
+ }
+ }
+
+ // Print value number info.
+ if (getNumValNums()) {
+ OS << " ";
+ unsigned vnum = 0;
+ for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+ ++i, ++vnum) {
+ const VNInfo *vni = *i;
+ if (vnum) OS << " ";
+ OS << vnum << "@";
+ if (vni->isUnused()) {
+ OS << "x";
+ } else {
+ OS << vni->def;
+ if (vni->isPHIDef())
+ OS << "-phi";
+ }
+ }
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LiveInterval::dump() const {
+ dbgs() << *this << "\n";
+}
+#endif
+
+#ifndef NDEBUG
+void LiveInterval::verify() const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ assert(I->start.isValid());
+ assert(I->end.isValid());
+ assert(I->start < I->end);
+ assert(I->valno != 0);
+ assert(I->valno == valnos[I->valno->id]);
+ if (llvm::next(I) != E) {
+ assert(I->end <= llvm::next(I)->start);
+ if (I->end == llvm::next(I)->start)
+ assert(I->valno != llvm::next(I)->valno);
+ }
+ }
+}
+#endif
+
+
+void LiveRange::print(raw_ostream &os) const {
+ os << *this;
+}
+
+//===----------------------------------------------------------------------===//
+// LiveRangeUpdater class
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeUpdater class always maintains these invariants:
+//
+// - When LastStart is invalid, Spills is empty and the iterators are invalid.
+// This is the initial state, and the state created by flush().
+// In this state, isDirty() returns false.
+//
+// Otherwise, segments are kept in three separate areas:
+//
+// 1. [begin; WriteI) at the front of LI.
+// 2. [ReadI; end) at the back of LI.
+// 3. Spills.
+//
+// - LI.begin() <= WriteI <= ReadI <= LI.end().
+// - Segments in all three areas are fully ordered and coalesced.
+// - Segments in area 1 precede and can't coalesce with segments in area 2.
+// - Segments in Spills precede and can't coalesce with segments in area 2.
+// - No coalescing is possible between segments in Spills and segments in area
+// 1, and there are no overlapping segments.
+//
+// The segments in Spills are not ordered with respect to the segments in area
+// 1. They need to be merged.
+//
+// When they exist, Spills.back().start <= LastStart,
+// and WriteI[-1].start <= LastStart.
+
+void LiveRangeUpdater::print(raw_ostream &OS) const {
+ if (!isDirty()) {
+ if (LI)
+ OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n';
+ else
+ OS << "Null updater.\n";
+ return;
+ }
+ assert(LI && "Can't have null LI in dirty updater.");
+ OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI)
+ << ", last start = " << LastStart
+ << ":\n Area 1:";
+ for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I)
+ OS << ' ' << *I;
+ OS << "\n Spills:";
+ for (unsigned I = 0, E = Spills.size(); I != E; ++I)
+ OS << ' ' << Spills[I];
+ OS << "\n Area 2:";
+ for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I)
+ OS << ' ' << *I;
+ OS << '\n';
+}
+
+void LiveRangeUpdater::dump() const
+{
+ print(errs());
+}
+
+// Determine if A and B should be coalesced.
+static inline bool coalescable(const LiveRange &A, const LiveRange &B) {
+ assert(A.start <= B.start && "Unordered live ranges.");
+ if (A.end == B.start)
+ return A.valno == B.valno;
+ if (A.end < B.start)
+ return false;
+ assert(A.valno == B.valno && "Cannot overlap different values");
+ return true;
+}
+
+void LiveRangeUpdater::add(LiveRange Seg) {
+ assert(LI && "Cannot add to a null destination");
+
+ // Flush the state if Start moves backwards.
+ if (!LastStart.isValid() || LastStart > Seg.start) {
+ if (isDirty())
+ flush();
+ // This brings us to an uninitialized state. Reinitialize.
+ assert(Spills.empty() && "Leftover spilled segments");
+ WriteI = ReadI = LI->begin();
+ }
+
+ // Remember start for next time.
+ LastStart = Seg.start;
+
+ // Advance ReadI until it ends after Seg.start.
+ LiveInterval::iterator E = LI->end();
+ if (ReadI != E && ReadI->end <= Seg.start) {
+ // First try to close the gap between WriteI and ReadI with spills.
+ if (ReadI != WriteI)
+ mergeSpills();
+ // Then advance ReadI.
+ if (ReadI == WriteI)
+ ReadI = WriteI = LI->find(Seg.start);
+ else
+ while (ReadI != E && ReadI->end <= Seg.start)
+ *WriteI++ = *ReadI++;
+ }
+
+ assert(ReadI == E || ReadI->end > Seg.start);
+
+ // Check if the ReadI segment begins early.
+ if (ReadI != E && ReadI->start <= Seg.start) {
+ assert(ReadI->valno == Seg.valno && "Cannot overlap different values");
+ // Bail if Seg is completely contained in ReadI.
+ if (ReadI->end >= Seg.end)
+ return;
+ // Coalesce into Seg.
+ Seg.start = ReadI->start;
+ ++ReadI;
+ }
+
+ // Coalesce as much as possible from ReadI into Seg.
+ while (ReadI != E && coalescable(Seg, *ReadI)) {
+ Seg.end = std::max(Seg.end, ReadI->end);
+ ++ReadI;
+ }
+
+ // Try coalescing Spills.back() into Seg.
+ if (!Spills.empty() && coalescable(Spills.back(), Seg)) {
+ Seg.start = Spills.back().start;
+ Seg.end = std::max(Spills.back().end, Seg.end);
+ Spills.pop_back();
+ }
+
+ // Try coalescing Seg into WriteI[-1].
+ if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) {
+ WriteI[-1].end = std::max(WriteI[-1].end, Seg.end);
+ return;
+ }
+
+ // Seg doesn't coalesce with anything, and needs to be inserted somewhere.
+ if (WriteI != ReadI) {
+ *WriteI++ = Seg;
+ return;
+ }
+
+ // Finally, append to LI or Spills.
+ if (WriteI == E) {
+ LI->ranges.push_back(Seg);
+ WriteI = ReadI = LI->ranges.end();
+ } else
+ Spills.push_back(Seg);
+}
+
+// Merge as many spilled segments as possible into the gap between WriteI
+// and ReadI. Advance WriteI to reflect the inserted instructions.
+void LiveRangeUpdater::mergeSpills() {
+ // Perform a backwards merge of Spills and [SpillI;WriteI).
+ size_t GapSize = ReadI - WriteI;
+ size_t NumMoved = std::min(Spills.size(), GapSize);
+ LiveInterval::iterator Src = WriteI;
+ LiveInterval::iterator Dst = Src + NumMoved;
+ LiveInterval::iterator SpillSrc = Spills.end();
+ LiveInterval::iterator B = LI->begin();
+
+ // This is the new WriteI position after merging spills.
+ WriteI = Dst;
+
+ // Now merge Src and Spills backwards.
+ while (Src != Dst) {
+ if (Src != B && Src[-1].start > SpillSrc[-1].start)
+ *--Dst = *--Src;
+ else
+ *--Dst = *--SpillSrc;
+ }
+ assert(NumMoved == size_t(Spills.end() - SpillSrc));
+ Spills.erase(SpillSrc, Spills.end());
+}
+
+void LiveRangeUpdater::flush() {
+ if (!isDirty())
+ return;
+ // Clear the dirty state.
+ LastStart = SlotIndex();
+
+ assert(LI && "Cannot add to a null destination");
+
+ // Nothing to merge?
+ if (Spills.empty()) {
+ LI->ranges.erase(WriteI, ReadI);
+ LI->verify();
+ return;
+ }
+
+ // Resize the WriteI - ReadI gap to match Spills.
+ size_t GapSize = ReadI - WriteI;
+ if (GapSize < Spills.size()) {
+ // The gap is too small. Make some room.
+ size_t WritePos = WriteI - LI->begin();
+ LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange());
+ // This also invalidated ReadI, but it is recomputed below.
+ WriteI = LI->ranges.begin() + WritePos;
+ } else {
+ // Shrink the gap if necessary.
+ LI->ranges.erase(WriteI + Spills.size(), ReadI);
+ }
+ ReadI = WriteI + Spills.size();
+ mergeSpills();
+ LI->verify();
+}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+ // Create initial equivalence classes.
+ EqClass.clear();
+ EqClass.grow(LI->getNumValNums());
+
+ const VNInfo *used = 0, *unused = 0;
+
+ // Determine connections.
+ for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ // Group all unused values into one class.
+ if (VNI->isUnused()) {
+ if (unused)
+ EqClass.join(unused->id, VNI->id);
+ unused = VNI;
+ continue;
+ }
+ used = VNI;
+ if (VNI->isPHIDef()) {
+ const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ assert(MBB && "Phi-def has no defining MBB");
+ // Connect to values live out of predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI)
+ if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
+ EqClass.join(VNI->id, PVNI->id);
+ } else {
+ // Normal value defined by an instruction. Check for two-addr redef.
+ // FIXME: This could be coincidental. Should we really check for a tied
+ // operand constraint?
+ // Note that VNI->def may be a use slot for an early clobber def.
+ if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def))
+ EqClass.join(VNI->id, UVNI->id);
+ }
+ }
+
+ // Lump all the unused values in with the last used value.
+ if (used && unused)
+ EqClass.join(used->id, unused->id);
+
+ EqClass.compress();
+ return EqClass.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
+ MachineRegisterInfo &MRI) {
+ assert(LIV[0] && "LIV[0] must be set");
+ LiveInterval &LI = *LIV[0];
+
+ // Rewrite instructions.
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ // DBG_VALUE instructions should have been eliminated earlier.
+ LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI));
+ const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ // In the case of an <undef> use that isn't tied to any def, VNI will be
+ // NULL. If the use is tied to a def, VNI will be the defined value.
+ if (!VNI)
+ continue;
+ MO.setReg(LIV[getEqClass(VNI)]->reg);
+ }
+
+ // Move runs to new intervals.
+ LiveInterval::iterator J = LI.begin(), E = LI.end();
+ while (J != E && EqClass[J->valno->id] == 0)
+ ++J;
+ for (LiveInterval::iterator I = J; I != E; ++I) {
+ if (unsigned eq = EqClass[I->valno->id]) {
+ assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ LIV[eq]->ranges.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LI.ranges.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LI.getNumValNums();
+ while (j != e && EqClass[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LI.getValNumInfo(i);
+ if (unsigned eq = EqClass[i]) {
+ VNI->id = LIV[eq]->getNumValNums();
+ LIV[eq]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LI.valnos[j++] = VNI;
+ }
+ }
+ LI.valnos.resize(j);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 0000000..f1b8394
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,1172 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cmath>
+#include <limits>
+using namespace llvm;
+
+char LiveIntervals::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervals::ID;
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ // LiveVariables isn't really required by this analysis, it is only required
+ // here to make sure it is live during TwoAddressInstructionPass and
+ // PHIElimination. This is temporary.
+ AU.addRequired<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequiredTransitiveID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveIntervals::LiveIntervals() : MachineFunctionPass(ID),
+ DomTree(0), LRCalc(0) {
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+}
+
+LiveIntervals::~LiveIntervals() {
+ delete LRCalc;
+}
+
+void LiveIntervals::releaseMemory() {
+ // Free the live intervals themselves.
+ for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i)
+ delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)];
+ VirtRegIntervals.clear();
+ RegMaskSlots.clear();
+ RegMaskBits.clear();
+ RegMaskBlocks.clear();
+
+ for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
+ delete RegUnitIntervals[i];
+ RegUnitIntervals.clear();
+
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ MRI = &MF->getRegInfo();
+ TM = &fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ if (!LRCalc)
+ LRCalc = new LiveRangeCalc();
+
+ // Allocate space for all virtual registers.
+ VirtRegIntervals.resize(MRI->getNumVirtRegs());
+
+ computeVirtRegs();
+ computeRegMasks();
+ computeLiveInRegUnits();
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+ OS << "********** INTERVALS **********\n";
+
+ // Dump the regunits.
+ for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
+ if (LiveInterval *LI = RegUnitIntervals[i])
+ OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n';
+
+ // Dump the virtregs.
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (hasInterval(Reg))
+ OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n';
+ }
+
+ OS << "RegMasks:";
+ for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
+ OS << ' ' << RegMaskSlots[i];
+ OS << '\n';
+
+ printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+ OS << "********** MACHINEINSTRS **********\n";
+ MF->print(OS, Indexes);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LiveIntervals::dumpInstrs() const {
+ printInstrs(dbgs());
+}
+#endif
+
+LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
+ return new LiveInterval(reg, Weight);
+}
+
+
+/// computeVirtRegInterval - Compute the live interval of a virtual register,
+/// based on defs and uses.
+void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) {
+ assert(LRCalc && "LRCalc not initialized.");
+ assert(LI->empty() && "Should only compute empty intervals.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LRCalc->createDeadDefs(LI);
+ LRCalc->extendToUses(LI);
+}
+
+void LiveIntervals::computeVirtRegs() {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = createInterval(Reg);
+ VirtRegIntervals[Reg] = LI;
+ computeVirtRegInterval(LI);
+ }
+}
+
+void LiveIntervals::computeRegMasks() {
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
+
+ // Find all instructions with regmask operands.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ RMB.first = RegMaskSlots.size();
+ for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
+ MI != ME; ++MI)
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isRegMask())
+ continue;
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
+ RegMaskBits.push_back(MO->getRegMask());
+ }
+ // Compute the number of register mask instructions in this block.
+ RMB.second = RegMaskSlots.size() - RMB.first;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register Unit Liveness
+//===----------------------------------------------------------------------===//
+//
+// Fixed interference typically comes from ABI boundaries: Function arguments
+// and return values are passed in fixed registers, and so are exception
+// pointers entering landing pads. Certain instructions require values to be
+// present in specific registers. That is also represented through fixed
+// interference.
+//
+
+/// computeRegUnitInterval - Compute the live interval of a register unit, based
+/// on the uses and defs of aliasing registers. The interval should be empty,
+/// or contain only dead phi-defs from ABI blocks.
+void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
+ unsigned Unit = LI->reg;
+
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+
+ // The physregs aliasing Unit are the roots and their super-registers.
+ // Create all values as dead defs before extending to uses. Note that roots
+ // may share super-registers. That's OK because createDeadDefs() is
+ // idempotent. It is very rare for a register unit to have multiple roots, so
+ // uniquing super-registers is probably not worthwhile.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ unsigned Root = *Roots;
+ if (!MRI->reg_empty(Root))
+ LRCalc->createDeadDefs(LI, Root);
+ for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
+ if (!MRI->reg_empty(*Supers))
+ LRCalc->createDeadDefs(LI, *Supers);
+ }
+ }
+
+ // Now extend LI to reach all uses.
+ // Ignore uses of reserved registers. We only track defs of those.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ unsigned Root = *Roots;
+ if (!MRI->isReserved(Root) && !MRI->reg_empty(Root))
+ LRCalc->extendToUses(LI, Root);
+ for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
+ unsigned Reg = *Supers;
+ if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
+ LRCalc->extendToUses(LI, Reg);
+ }
+ }
+}
+
+
+/// computeLiveInRegUnits - Precompute the live ranges of any register units
+/// that are live-in to an ABI block somewhere. Register values can appear
+/// without a corresponding def when entering the entry block or a landing pad.
+///
+void LiveIntervals::computeLiveInRegUnits() {
+ RegUnitIntervals.resize(TRI->getNumRegUnits());
+ DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
+
+ // Keep track of the intervals allocated.
+ SmallVector<LiveInterval*, 8> NewIntvs;
+
+ // Check all basic blocks for live-ins.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock *MBB = MFI;
+
+ // We only care about ABI blocks: Entry + landing pads.
+ if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty())
+ continue;
+
+ // Create phi-defs at Begin for all live-in registers.
+ SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
+ DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
+ for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(),
+ LIE = MBB->livein_end(); LII != LIE; ++LII) {
+ for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = *Units;
+ LiveInterval *Intv = RegUnitIntervals[Unit];
+ if (!Intv) {
+ Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF);
+ NewIntvs.push_back(Intv);
+ }
+ VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator());
+ (void)VNI;
+ DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n");
+
+ // Compute the 'normal' part of the intervals.
+ for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i)
+ computeRegUnitInterval(NewIntvs[i]);
+}
+
+
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+bool LiveIntervals::shrinkToUses(LiveInterval *li,
+ SmallVectorImpl<MachineInstr*> *dead) {
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+ && "Can only shrink virtual registers");
+ // Find all the values used, including PHI kills.
+ SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+ // Blocks that have already been added to WorkList as live-out.
+ SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+
+ // Visit all instructions reading li->reg.
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ continue;
+ SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+ LiveRangeQuery LRQ(*li, Idx);
+ VNInfo *VNI = LRQ.valueIn();
+ if (!VNI) {
+ // This shouldn't happen: readsVirtualRegister returns true, but there is
+ // no live value. It is likely caused by a target getting <undef> flags
+ // wrong.
+ DEBUG(dbgs() << Idx << '\t' << *UseMI
+ << "Warning: Instr claims to read non-existent value in "
+ << *li << '\n');
+ continue;
+ }
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ if (VNInfo *DefVNI = LRQ.valueDefined())
+ Idx = DefVNI->def;
+
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create a new live interval with only minimal live segments per def.
+ LiveInterval NewLI(li->reg, 0);
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI));
+ }
+
+ // Keep track of the PHIs that are in use.
+ SmallPtrSet<VNInfo*, 8> UsedPHIs;
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+ const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot());
+ SlotIndex BlockStart = getMBBStartIdx(MBB);
+
+ // Extend the live range for VNI to be live at Idx.
+ if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
+ (void)ExtVNI;
+ assert(ExtVNI == VNI && "Unexpected existing value number");
+ // Is this a PHIDef we haven't seen before?
+ if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI))
+ continue;
+ // The PHI is live, make sure the predecessors are live-out.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ if (!LiveOut.insert(*PI))
+ continue;
+ SlotIndex Stop = getMBBEndIdx(*PI);
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (VNInfo *PVNI = li->getVNInfoBefore(Stop))
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ NewLI.addRange(LiveRange(BlockStart, Idx, VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ if (!LiveOut.insert(*PI))
+ continue;
+ SlotIndex Stop = getMBBEndIdx(*PI);
+ assert(li->getVNInfoBefore(Stop) == VNI &&
+ "Wrong value out of predecessor");
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ }
+ }
+
+ // Handle dead values.
+ bool CanSeparate = false;
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+ assert(LII != NewLI.end() && "Missing live range for PHI");
+ if (LII->end != VNI->def.getDeadSlot())
+ continue;
+ if (VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->markUnused();
+ NewLI.removeRange(*LII);
+ DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+ CanSeparate = true;
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(VNI->def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(li->reg, TRI);
+ if (dead && MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
+ dead->push_back(MI);
+ }
+ }
+ }
+
+ // Move the trimmed ranges back.
+ li->ranges.swap(NewLI.ranges);
+ DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ return CanSeparate;
+}
+
+void LiveIntervals::extendToIndices(LiveInterval *LI,
+ ArrayRef<SlotIndex> Indices) {
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+ LRCalc->extend(LI, Indices[i]);
+}
+
+void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
+ SmallVectorImpl<SlotIndex> *EndPoints) {
+ LiveRangeQuery LRQ(*LI, Kill);
+ VNInfo *VNI = LRQ.valueOut();
+ if (!VNI)
+ return;
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
+ SlotIndex MBBStart, MBBEnd;
+ tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
+
+ // If VNI isn't live out from KillMBB, the value is trivially pruned.
+ if (LRQ.endPoint() < MBBEnd) {
+ LI->removeRange(Kill, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ return;
+ }
+
+ // VNI is live out of KillMBB.
+ LI->removeRange(Kill, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+
+ // Find all blocks that are reachable from KillMBB without leaving VNI's live
+ // range. It is possible that KillMBB itself is reachable, so start a DFS
+ // from each successor.
+ typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+ VisitedTy Visited;
+ for (MachineBasicBlock::succ_iterator
+ SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
+ SuccI != SuccE; ++SuccI) {
+ for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+ I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+ I != E;) {
+ MachineBasicBlock *MBB = *I;
+
+ // Check if VNI is live in to MBB.
+ tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+ LiveRangeQuery LRQ(*LI, MBBStart);
+ if (LRQ.valueIn() != VNI) {
+ // This block isn't part of the VNI live range. Prune the search.
+ I.skipChildren();
+ continue;
+ }
+
+ // Prune the search if VNI is killed in MBB.
+ if (LRQ.endPoint() < MBBEnd) {
+ LI->removeRange(MBBStart, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ I.skipChildren();
+ continue;
+ }
+
+ // VNI is live through MBB.
+ LI->removeRange(MBBStart, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+ ++I;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
+ // Keep track of regunit ranges.
+ SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU;
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = &getInterval(Reg);
+ if (LI->empty())
+ continue;
+
+ // Find the regunit intervals for the assigned register. They may overlap
+ // the virtual register live range, cancelling any kills.
+ RU.clear();
+ for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
+ ++Units) {
+ LiveInterval *RUInt = &getRegUnit(*Units);
+ if (RUInt->empty())
+ continue;
+ RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end)));
+ }
+
+ // Every instruction that kills Reg corresponds to a live range end point.
+ for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+ ++RI) {
+ // A block index indicates an MBB edge.
+ if (RI->end.isBlock())
+ continue;
+ MachineInstr *MI = getInstructionFromIndex(RI->end);
+ if (!MI)
+ continue;
+
+ // Check if any of the reguints are live beyond the end of RI. That could
+ // happen when a physreg is defined as a copy of a virtreg:
+ //
+ // %EAX = COPY %vreg5
+ // FOO %vreg5 <--- MI, cancel kill because %EAX is live.
+ // BAR %EAX<kill>
+ //
+ // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
+ bool CancelKill = false;
+ for (unsigned u = 0, e = RU.size(); u != e; ++u) {
+ LiveInterval *RInt = RU[u].first;
+ LiveInterval::iterator &I = RU[u].second;
+ if (I == RInt->end())
+ continue;
+ I = RInt->advanceTo(I, RI->end);
+ if (I == RInt->end() || I->start >= RI->end)
+ continue;
+ // I is overlapping RI.
+ CancelKill = true;
+ break;
+ }
+ if (CancelKill)
+ MI->clearRegisterKills(Reg, NULL);
+ else
+ MI->addRegisterKilled(Reg, NULL);
+ }
+ }
+}
+
+MachineBasicBlock*
+LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+ // A local live range must be fully contained inside the block, meaning it is
+ // defined and killed at instructions, not at block boundaries. It is not
+ // live in or or out of any block.
+ //
+ // It is technically possible to have a PHI-defined live range identical to a
+ // single block, but we are going to return false in that case.
+
+ SlotIndex Start = LI.beginIndex();
+ if (Start.isBlock())
+ return NULL;
+
+ SlotIndex Stop = LI.endIndex();
+ if (Stop.isBlock())
+ return NULL;
+
+ // getMBBFromIndex doesn't need to search the MBB table when both indexes
+ // belong to proper instructions.
+ MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start);
+ MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop);
+ return MBB1 == MBB2 ? MBB1 : NULL;
+}
+
+bool
+LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *PHI = *I;
+ if (PHI->isUnused() || !PHI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
+ // Conservatively return true instead of scanning huge predecessor lists.
+ if (PHIMBB->pred_size() > 100)
+ return true;
+ for (MachineBasicBlock::const_pred_iterator
+ PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ return true;
+ }
+ return false;
+}
+
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+ // Limit the loop depth ridiculousness.
+ if (loopDepth > 200)
+ loopDepth = 200;
+
+ // The loop depth is used to roughly estimate the number of times the
+ // instruction is executed. Something like 10^d is simple, but will quickly
+ // overflow a float. This expression behaves like 10^d for small d, but is
+ // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+ // headroom before overflow.
+ // By the way, powf() might be unavailable here. For consistency,
+ // We may take pow(double,double).
+ float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
+
+ return (isDef + isUse) * lc;
+}
+
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+ MachineInstr* startInst) {
+ LiveInterval& Interval = getOrCreateInterval(reg);
+ VNInfo* VN = Interval.getNextValue(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getVNInfoAllocator());
+ LiveRange LR(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getMBBEndIdx(startInst->getParent()), VN);
+ Interval.addRange(LR);
+
+ return LR;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register mask functions
+//===----------------------------------------------------------------------===//
+
+bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
+ BitVector &UsableRegs) {
+ if (LI.empty())
+ return false;
+ LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end();
+
+ // Use a smaller arrays for local live ranges.
+ ArrayRef<SlotIndex> Slots;
+ ArrayRef<const uint32_t*> Bits;
+ if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) {
+ Slots = getRegMaskSlotsInBlock(MBB->getNumber());
+ Bits = getRegMaskBitsInBlock(MBB->getNumber());
+ } else {
+ Slots = getRegMaskSlots();
+ Bits = getRegMaskBits();
+ }
+
+ // We are going to enumerate all the register mask slots contained in LI.
+ // Start with a binary search of RegMaskSlots to find a starting point.
+ ArrayRef<SlotIndex>::iterator SlotI =
+ std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+ ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+ // No slots in range, LI begins after the last call.
+ if (SlotI == SlotE)
+ return false;
+
+ bool Found = false;
+ for (;;) {
+ assert(*SlotI >= LiveI->start);
+ // Loop over all slots overlapping this segment.
+ while (*SlotI < LiveI->end) {
+ // *SlotI overlaps LI. Collect mask bits.
+ if (!Found) {
+ // This is the first overlap. Initialize UsableRegs to all ones.
+ UsableRegs.clear();
+ UsableRegs.resize(TRI->getNumRegs(), true);
+ Found = true;
+ }
+ // Remove usable registers clobbered by this mask.
+ UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]);
+ if (++SlotI == SlotE)
+ return Found;
+ }
+ // *SlotI is beyond the current LI segment.
+ LiveI = LI.advanceTo(LiveI, *SlotI);
+ if (LiveI == LiveE)
+ return Found;
+ // Advance SlotI until it overlaps.
+ while (*SlotI < LiveI->start)
+ if (++SlotI == SlotE)
+ return Found;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// IntervalUpdate class.
+//===----------------------------------------------------------------------===//
+
+// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+class LiveIntervals::HMEditor {
+private:
+ LiveIntervals& LIS;
+ const MachineRegisterInfo& MRI;
+ const TargetRegisterInfo& TRI;
+ SlotIndex OldIdx;
+ SlotIndex NewIdx;
+ SmallPtrSet<LiveInterval*, 8> Updated;
+ bool UpdateFlags;
+
+public:
+ HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
+ const TargetRegisterInfo& TRI,
+ SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags)
+ : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx),
+ UpdateFlags(UpdateFlags) {}
+
+ // FIXME: UpdateFlags is a workaround that creates live intervals for all
+ // physregs, even those that aren't needed for regalloc, in order to update
+ // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
+ // flags, and postRA passes will use a live register utility instead.
+ LiveInterval *getRegUnitLI(unsigned Unit) {
+ if (UpdateFlags)
+ return &LIS.getRegUnit(Unit);
+ return LIS.getCachedRegUnit(Unit);
+ }
+
+ /// Update all live ranges touched by MI, assuming a move from OldIdx to
+ /// NewIdx.
+ void updateAllRanges(MachineInstr *MI) {
+ DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+ bool hasRegMask = false;
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (MO->isRegMask())
+ hasRegMask = true;
+ if (!MO->isReg())
+ continue;
+ // Aggressively clear all kill flags.
+ // They are reinserted by VirtRegRewriter.
+ if (MO->isUse())
+ MO->setIsKill(false);
+
+ unsigned Reg = MO->getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ updateRange(LIS.getInterval(Reg));
+ continue;
+ }
+
+ // For physregs, only update the regunits that actually have a
+ // precomputed live range.
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = getRegUnitLI(*Units))
+ updateRange(*LI);
+ }
+ if (hasRegMask)
+ updateRegMaskSlots();
+ }
+
+private:
+ /// Update a single live range, assuming an instruction has been moved from
+ /// OldIdx to NewIdx.
+ void updateRange(LiveInterval &LI) {
+ if (!Updated.insert(&LI))
+ return;
+ DEBUG({
+ dbgs() << " ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ dbgs() << PrintReg(LI.reg);
+ else
+ dbgs() << PrintRegUnit(LI.reg, &TRI);
+ dbgs() << ":\t" << LI << '\n';
+ });
+ if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+ handleMoveDown(LI);
+ else
+ handleMoveUp(LI);
+ DEBUG(dbgs() << " -->\t" << LI << '\n');
+ LI.verify();
+ }
+
+ /// Update LI to reflect an instruction has been moved downwards from OldIdx
+ /// to NewIdx.
+ ///
+ /// 1. Live def at OldIdx:
+ /// Move def to NewIdx, assert endpoint after NewIdx.
+ ///
+ /// 2. Live def at OldIdx, killed at NewIdx:
+ /// Change to dead def at NewIdx.
+ /// (Happens when bundling def+kill together).
+ ///
+ /// 3. Dead def at OldIdx:
+ /// Move def to NewIdx, possibly across another live value.
+ ///
+ /// 4. Def at OldIdx AND at NewIdx:
+ /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx.
+ /// (Happens when bundling multiple defs together).
+ ///
+ /// 5. Value read at OldIdx, killed before NewIdx:
+ /// Extend kill to NewIdx.
+ ///
+ void handleMoveDown(LiveInterval &LI) {
+ // First look for a kill at OldIdx.
+ LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+ LiveInterval::iterator E = LI.end();
+ // Is LI even live at OldIdx?
+ if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+ return;
+
+ // Handle a live-in value.
+ if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+ bool isKill = SlotIndex::isSameInstr(OldIdx, I->end);
+ // If the live-in value already extends to NewIdx, there is nothing to do.
+ if (!SlotIndex::isEarlierInstr(I->end, NewIdx))
+ return;
+ // Aggressively remove all kill flags from the old kill point.
+ // Kill flags shouldn't be used while live intervals exist, they will be
+ // reinserted by VirtRegRewriter.
+ if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end))
+ for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isUse())
+ MO->setIsKill(false);
+ // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by
+ // overlapping ranges. Case 5 above.
+ I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+ // If this was a kill, there may also be a def. Otherwise we're done.
+ if (!isKill)
+ return;
+ ++I;
+ }
+
+ // Check for a def at OldIdx.
+ if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start))
+ return;
+ // We have a def at OldIdx.
+ VNInfo *DefVNI = I->valno;
+ assert(DefVNI->def == I->start && "Inconsistent def");
+ DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+ // If the defined value extends beyond NewIdx, just move the def down.
+ // This is case 1 above.
+ if (SlotIndex::isEarlierInstr(NewIdx, I->end)) {
+ I->start = DefVNI->def;
+ return;
+ }
+ // The remaining possibilities are now:
+ // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx).
+ // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot().
+ // In either case, it is possible that there is an existing def at NewIdx.
+ assert((I->end == OldIdx.getDeadSlot() ||
+ SlotIndex::isSameInstr(I->end, NewIdx)) &&
+ "Cannot move def below kill");
+ LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot());
+ if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+ // There is an existing def at NewIdx, case 4 above. The def at OldIdx is
+ // coalesced into that value.
+ assert(NewI->valno != DefVNI && "Multiple defs of value?");
+ LI.removeValNo(DefVNI);
+ return;
+ }
+ // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
+ // If the def at OldIdx was dead, we allow it to be moved across other LI
+ // values. The new range should be placed immediately before NewI, move any
+ // intermediate ranges up.
+ assert(NewI != I && "Inconsistent iterators");
+ std::copy(llvm::next(I), NewI, I);
+ *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
+ }
+
+ /// Update LI to reflect an instruction has been moved upwards from OldIdx
+ /// to NewIdx.
+ ///
+ /// 1. Live def at OldIdx:
+ /// Hoist def to NewIdx.
+ ///
+ /// 2. Dead def at OldIdx:
+ /// Hoist def+end to NewIdx, possibly move across other values.
+ ///
+ /// 3. Dead def at OldIdx AND existing def at NewIdx:
+ /// Remove value defined at OldIdx, coalescing it with existing value.
+ ///
+ /// 4. Live def at OldIdx AND existing def at NewIdx:
+ /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx.
+ /// (Happens when bundling multiple defs together).
+ ///
+ /// 5. Value killed at OldIdx:
+ /// Hoist kill to NewIdx, then scan for last kill between NewIdx and
+ /// OldIdx.
+ ///
+ void handleMoveUp(LiveInterval &LI) {
+ // First look for a kill at OldIdx.
+ LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+ LiveInterval::iterator E = LI.end();
+ // Is LI even live at OldIdx?
+ if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+ return;
+
+ // Handle a live-in value.
+ if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+ // If the live-in value isn't killed here, there is nothing to do.
+ if (!SlotIndex::isSameInstr(OldIdx, I->end))
+ return;
+ // Adjust I->end to end at NewIdx. If we are hoisting a kill above
+ // another use, we need to search for that use. Case 5 above.
+ I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+ ++I;
+ // If OldIdx also defines a value, there couldn't have been another use.
+ if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
+ // No def, search for the new kill.
+ // This can never be an early clobber kill since there is no def.
+ llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot();
+ return;
+ }
+ }
+
+ // Now deal with the def at OldIdx.
+ assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?");
+ VNInfo *DefVNI = I->valno;
+ assert(DefVNI->def == I->start && "Inconsistent def");
+ DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+
+ // Check for an existing def at NewIdx.
+ LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot());
+ if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+ assert(NewI->valno != DefVNI && "Same value defined more than once?");
+ // There is an existing def at NewIdx.
+ if (I->end.isDead()) {
+ // Case 3: Remove the dead def at OldIdx.
+ LI.removeValNo(DefVNI);
+ return;
+ }
+ // Case 4: Replace def at NewIdx with live def at OldIdx.
+ I->start = DefVNI->def;
+ LI.removeValNo(NewI->valno);
+ return;
+ }
+
+ // There is no existing def at NewIdx. Hoist DefVNI.
+ if (!I->end.isDead()) {
+ // Leave the end point of a live def.
+ I->start = DefVNI->def;
+ return;
+ }
+
+ // DefVNI is a dead def. It may have been moved across other values in LI,
+ // so move I up to NewI. Slide [NewI;I) down one position.
+ std::copy_backward(NewI, I, llvm::next(I));
+ *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
+ }
+
+ void updateRegMaskSlots() {
+ SmallVectorImpl<SlotIndex>::iterator RI =
+ std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+ OldIdx);
+ assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
+ "No RegMask at OldIdx.");
+ *RI = NewIdx.getRegSlot();
+ assert((RI == LIS.RegMaskSlots.begin() ||
+ SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) &&
+ "Cannot move regmask instruction above another call");
+ assert((llvm::next(RI) == LIS.RegMaskSlots.end() ||
+ SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) &&
+ "Cannot move regmask instruction below another call");
+ }
+
+ // Return the last use of reg between NewIdx and OldIdx.
+ SlotIndex findLastUseBefore(unsigned Reg) {
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ SlotIndex LastUse = NewIdx;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI.use_nodbg_begin(Reg),
+ UE = MRI.use_nodbg_end();
+ UI != UE; UI.skipInstruction()) {
+ const MachineInstr* MI = &*UI;
+ SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+ if (InstSlot > LastUse && InstSlot < OldIdx)
+ LastUse = InstSlot;
+ }
+ return LastUse;
+ }
+
+ // This is a regunit interval, so scanning the use list could be very
+ // expensive. Scan upwards from OldIdx instead.
+ assert(NewIdx < OldIdx && "Expected upwards move");
+ SlotIndexes *Indexes = LIS.getSlotIndexes();
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx);
+
+ // OldIdx may not correspond to an instruction any longer, so set MII to
+ // point to the next instruction after OldIdx, or MBB->end().
+ MachineBasicBlock::iterator MII = MBB->end();
+ if (MachineInstr *MI = Indexes->getInstructionFromIndex(
+ Indexes->getNextNonNullIndex(OldIdx)))
+ if (MI->getParent() == MBB)
+ MII = MI;
+
+ MachineBasicBlock::iterator Begin = MBB->begin();
+ while (MII != Begin) {
+ if ((--MII)->isDebugValue())
+ continue;
+ SlotIndex Idx = Indexes->getInstructionIndex(MII);
+
+ // Stop searching when NewIdx is reached.
+ if (!SlotIndex::isEarlierInstr(NewIdx, Idx))
+ return NewIdx;
+
+ // Check if MII uses Reg.
+ for (MIBundleOperands MO(MII); MO.isValid(); ++MO)
+ if (MO->isReg() &&
+ TargetRegisterInfo::isPhysicalRegister(MO->getReg()) &&
+ TRI.hasRegUnit(MO->getReg(), Reg))
+ return Idx;
+ }
+ // Didn't reach NewIdx. It must be the first instruction in the block.
+ return NewIdx;
+ }
+};
+
+void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) {
+ assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
+ Indexes->removeMachineInstrFromMaps(MI);
+ SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
+ assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
+ OldIndex < getMBBEndIdx(MI->getParent()) &&
+ "Cannot handle moves across basic block boundaries.");
+
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(MI);
+}
+
+void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
+ MachineInstr* BundleStart,
+ bool UpdateFlags) {
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
+ SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(MI);
+}
+
+void
+LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ ArrayRef<unsigned> OrigRegs) {
+ // Find anchor points, which are at the beginning/end of blocks or at
+ // instructions that already have indexes.
+ while (Begin != MBB->begin() && !Indexes->hasIndex(Begin))
+ --Begin;
+ while (End != MBB->end() && !Indexes->hasIndex(End))
+ ++End;
+
+ SlotIndex endIdx;
+ if (End == MBB->end())
+ endIdx = getMBBEndIdx(MBB).getPrevSlot();
+ else
+ endIdx = getInstructionIndex(End);
+
+ Indexes->repairIndexesInRange(MBB, Begin, End);
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MOI->getReg()) &&
+ !hasInterval(MOI->getReg())) {
+ LiveInterval &LI = getOrCreateInterval(MOI->getReg());
+ computeVirtRegInterval(&LI);
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) {
+ unsigned Reg = OrigRegs[i];
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ LiveInterval &LI = getInterval(Reg);
+ // FIXME: Should we support undefs that gain defs?
+ if (!LI.hasAtLeastOneValue())
+ continue;
+
+ LiveInterval::iterator LII = LI.find(endIdx);
+ SlotIndex lastUseIdx;
+ if (LII != LI.end() && LII->start < endIdx)
+ lastUseIdx = LII->end;
+ else
+ --LII;
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+
+ SlotIndex instrIdx = getInstructionIndex(MI);
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ bool isEndValid = getInstructionFromIndex(LII->end);
+
+ // FIXME: This doesn't currently handle early-clobber or multiple removed
+ // defs inside of the region to repair.
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ const MachineOperand &MO = *OI;
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ if (MO.isDef()) {
+ if (!isStartValid) {
+ if (LII->end.isDead()) {
+ SlotIndex prevStart;
+ if (LII != LI.begin())
+ prevStart = llvm::prior(LII)->start;
+
+ // FIXME: This could be more efficient if there was a removeRange
+ // method that returned an iterator.
+ LI.removeRange(*LII, true);
+ if (prevStart.isValid())
+ LII = LI.find(prevStart);
+ else
+ LII = LI.begin();
+ } else {
+ LII->start = instrIdx.getRegSlot();
+ LII->valno->def = instrIdx.getRegSlot();
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ continue;
+ }
+ }
+
+ if (!lastUseIdx.isValid()) {
+ VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
+ VNInfoAllocator);
+ LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI);
+ LII = LI.addRange(LR);
+ } else if (LII->start != instrIdx.getRegSlot()) {
+ VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
+ VNInfoAllocator);
+ LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI);
+ LII = LI.addRange(LR);
+ }
+
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ } else if (MO.isUse()) {
+ // FIXME: This should probably be handled outside of this branch,
+ // either as part of the def case (for defs inside of the region) or
+ // after the loop over the region.
+ if (!isEndValid && !LII->end.isBlock())
+ LII->end = instrIdx.getRegSlot();
+ if (!lastUseIdx.isValid())
+ lastUseIdx = instrIdx.getRegSlot();
+ }
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 0000000..d5a81a3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,204 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Insert each of the virtual register's live segments into the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ while (SegPos.valid()) {
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+ if (++RegPos == RegEnd)
+ return;
+ SegPos.advanceTo(RegPos->start);
+ }
+
+ // We have reached the end of Segments, so it is no longer necessary to search
+ // for the insertion position.
+ // It is faster to insert the end first.
+ --RegEnd;
+ SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg);
+ for (; RegPos != RegEnd; ++RegPos, ++SegPos)
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Remove each of the virtual register's live segments from the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ for (;;) {
+ assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+ SegPos.erase();
+ if (!SegPos.valid())
+ return;
+
+ // Skip all segments that may have been coalesced.
+ RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+ if (RegPos == RegEnd)
+ return;
+
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ if (empty()) {
+ OS << " empty\n";
+ return;
+ }
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+ << PrintReg(SI.value()->reg, TRI);
+ }
+ OS << '\n';
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+ for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+ VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+ SmallVectorImpl<LiveInterval*>::const_iterator I =
+ std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+ return I != InterferingVRegs.end();
+}
+
+// Collect virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The query state is one of:
+//
+// 1. CheckedFirstInterference == false: Iterators are uninitialized.
+// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.
+// 3. Iterators left at the last seen intersection.
+//
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+ // Fast path return if we already have the desired information.
+ if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+
+ // Set up iterators on the first call.
+ if (!CheckedFirstInterference) {
+ CheckedFirstInterference = true;
+
+ // Quickly skip interference check for empty sets.
+ if (VirtReg->empty() || LiveUnion->empty()) {
+ SeenAllInterferences = true;
+ return 0;
+ }
+
+ // In most cases, the union will start before VirtReg.
+ VirtRegI = VirtReg->begin();
+ LiveUnionI.setMap(LiveUnion->getMap());
+ LiveUnionI.find(VirtRegI->start);
+ }
+
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveInterval *RecentReg = 0;
+ while (LiveUnionI.valid()) {
+ assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg");
+
+ // Check for overlapping interference.
+ while (VirtRegI->start < LiveUnionI.stop() &&
+ VirtRegI->end > LiveUnionI.start()) {
+ // This is an overlap, record the interfering register.
+ LiveInterval *VReg = LiveUnionI.value();
+ if (VReg != RecentReg && !isSeenInterference(VReg)) {
+ RecentReg = VReg;
+ InterferingVRegs.push_back(VReg);
+ if (InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+ }
+ // This LiveUnion segment is no longer interesting.
+ if (!(++LiveUnionI).valid()) {
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+ }
+ }
+
+ // The iterators are now not overlapping, LiveUnionI has been advanced
+ // beyond VirtRegI.
+ assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap");
+
+ // Advance the iterator that ends first.
+ VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start());
+ if (VirtRegI == VirtRegEnd)
+ break;
+
+ // Detect overlap, handle above.
+ if (VirtRegI->start < LiveUnionI.stop())
+ continue;
+
+ // Still not overlapping. Catch up LiveUnionI.
+ LiveUnionI.advanceTo(VirtRegI->start);
+ }
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+}
+
+void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
+ unsigned NSize) {
+ // Reuse existing allocation.
+ if (NSize == Size)
+ return;
+ clear();
+ Size = NSize;
+ LIUs = static_cast<LiveIntervalUnion*>(
+ malloc(sizeof(LiveIntervalUnion)*NSize));
+ for (unsigned i = 0; i != Size; ++i)
+ new(LIUs + i) LiveIntervalUnion(Alloc);
+}
+
+void LiveIntervalUnion::Array::clear() {
+ if (!LIUs)
+ return;
+ for (unsigned i = 0; i != Size; ++i)
+ LIUs[i].~LiveIntervalUnion();
+ free(LIUs);
+ Size = 0;
+ LIUs = 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
new file mode 100644
index 0000000..dede490
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -0,0 +1,380 @@
+//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the LiveRangeCalc class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveRangeCalc.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+void LiveRangeCalc::reset(const MachineFunction *mf,
+ SlotIndexes *SI,
+ MachineDominatorTree *MDT,
+ VNInfo::Allocator *VNIA) {
+ MF = mf;
+ MRI = &MF->getRegInfo();
+ Indexes = SI;
+ DomTree = MDT;
+ Alloc = VNIA;
+
+ unsigned N = MF->getNumBlockIDs();
+ Seen.clear();
+ Seen.resize(N);
+ LiveOut.resize(N);
+ LiveIn.clear();
+}
+
+
+void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // LI->createDeadDef() will deduplicate.
+ for (MachineRegisterInfo::def_iterator
+ I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ // Find the corresponding slot index.
+ SlotIndex Idx;
+ if (MI->isPHI())
+ // PHI defs begin at the basic block start index.
+ Idx = Indexes->getMBBStartIdx(MI->getParent());
+ else
+ // Instructions are either normal 'r', or early clobber 'e'.
+ Idx = Indexes->getInstructionIndex(MI)
+ .getRegSlot(I.getOperand().isEarlyClobber());
+
+ // Create the def in LI. This may find an existing def.
+ LI->createDeadDef(Idx, *Alloc);
+ }
+}
+
+
+void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all operands that read Reg. This may include partial defs.
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
+ E = MRI->reg_nodbg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ // Clear all kill flags. They will be reinserted after register allocation
+ // by LiveIntervalAnalysis::addKillFlags().
+ if (MO.isUse())
+ MO.setIsKill(false);
+ if (!MO.readsReg())
+ continue;
+ // MI is reading Reg. We may have visited MI before if it happens to be
+ // reading Reg multiple times. That is OK, extend() is idempotent.
+ const MachineInstr *MI = &*I;
+
+ // Find the SlotIndex being read.
+ SlotIndex Idx;
+ if (MI->isPHI()) {
+ assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
+ // PHI operands are paired: (Reg, PredMBB).
+ // Extend the live range to be live-out from PredMBB.
+ Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB());
+ } else {
+ // This is a normal instruction.
+ Idx = Indexes->getInstructionIndex(MI).getRegSlot();
+ // Check for early-clobber redefs.
+ unsigned DefIdx;
+ if (MO.isDef()) {
+ if (MO.isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) {
+ // FIXME: This would be a lot easier if tied early-clobber uses also
+ // had an early-clobber flag.
+ if (MI->getOperand(DefIdx).isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ }
+ }
+ extend(LI, Idx, Reg);
+ }
+}
+
+
+// Transfer information from the LiveIn vector to the live ranges.
+void LiveRangeCalc::updateLiveIns() {
+ LiveRangeUpdater Updater;
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
+ E = LiveIn.end(); I != E; ++I) {
+ if (!I->DomNode)
+ continue;
+ MachineBasicBlock *MBB = I->DomNode->getBlock();
+ assert(I->Value && "No live-in value found");
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(MBB);
+
+ if (I->Kill.isValid())
+ // Value is killed inside this block.
+ End = I->Kill;
+ else {
+ // The value is live-through, update LiveOut as well.
+ // Defer the Domtree lookup until it is needed.
+ assert(Seen.test(MBB->getNumber()));
+ LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0);
+ }
+ Updater.setDest(I->LI);
+ Updater.add(Start, End, I->Value);
+ }
+ LiveIn.clear();
+}
+
+
+void LiveRangeCalc::extend(LiveInterval *LI,
+ SlotIndex Kill,
+ unsigned PhysReg) {
+ assert(LI && "Missing live range");
+ assert(Kill.isValid() && "Invalid SlotIndex");
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
+ assert(KillMBB && "No MBB at Kill");
+
+ // Is there a def in the same MBB we can extend?
+ if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
+ return;
+
+ // Find the single reaching def, or determine if Kill is jointly dominated by
+ // multiple values, and we may need to create even more phi-defs to preserve
+ // VNInfo SSA form. Perform a search for all predecessor blocks where we
+ // know the dominating VNInfo.
+ if (findReachingDefs(LI, KillMBB, Kill, PhysReg))
+ return;
+
+ // When there were multiple different values, we may need new PHIs.
+ calculateValues();
+}
+
+
+// This function is called by a client after using the low-level API to add
+// live-out and live-in blocks. The unique value optimization is not
+// available, SplitEditor::transferValues handles that case directly anyway.
+void LiveRangeCalc::calculateValues() {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+ updateSSA();
+ updateLiveIns();
+}
+
+
+bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ unsigned PhysReg) {
+ unsigned KillMBBNum = KillMBB->getNumber();
+
+ // Block numbers where LI should be live-in.
+ SmallVector<unsigned, 16> WorkList(1, KillMBBNum);
+
+ // Remember if we have seen more than one value.
+ bool UniqueVNI = true;
+ VNInfo *TheVNI = 0;
+
+ // Using Seen as a visited set, perform a BFS for all reaching defs.
+ for (unsigned i = 0; i != WorkList.size(); ++i) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]);
+
+#ifndef NDEBUG
+ if (MBB->pred_empty()) {
+ MBB->getParent()->verify();
+ llvm_unreachable("Use not jointly dominated by defs.");
+ }
+
+ if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ !MBB->isLiveIn(PhysReg)) {
+ MBB->getParent()->verify();
+ errs() << "The register needs to be live in to BB#" << MBB->getNumber()
+ << ", but is missing from the live-in list.\n";
+ llvm_unreachable("Invalid global physical register");
+ }
+#endif
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+
+ // Is this a known live-out block?
+ if (Seen.test(Pred->getNumber())) {
+ if (VNInfo *VNI = LiveOut[Pred].first) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ }
+ continue;
+ }
+
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(Pred);
+
+ // First time we see Pred. Try to determine the live-out value, but set
+ // it as null if Pred is live-through with an unknown value.
+ VNInfo *VNI = LI->extendInBlock(Start, End);
+ setLiveOutValue(Pred, VNI);
+ if (VNI) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ continue;
+ }
+
+ // No, we need a live-in value for Pred as well
+ if (Pred != KillMBB)
+ WorkList.push_back(Pred->getNumber());
+ else
+ // Loopback to KillMBB, so value is really live through.
+ Kill = SlotIndex();
+ }
+ }
+
+ LiveIn.clear();
+
+ // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
+ // neither require it. Skip the sorting overhead for small updates.
+ if (WorkList.size() > 4)
+ array_pod_sort(WorkList.begin(), WorkList.end());
+
+ // If a unique reaching def was found, blit in the live ranges immediately.
+ if (UniqueVNI) {
+ LiveRangeUpdater Updater(LI);
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(*I);
+ // Trim the live range in KillMBB.
+ if (*I == KillMBBNum && Kill.isValid())
+ End = Kill;
+ else
+ LiveOut[MF->getBlockNumbered(*I)] =
+ LiveOutPair(TheVNI, (MachineDomTreeNode *)0);
+ Updater.add(Start, End, TheVNI);
+ }
+ return true;
+ }
+
+ // Multiple values were found, so transfer the work list to the LiveIn array
+ // where UpdateSSA will use it as a work list.
+ LiveIn.reserve(WorkList.size());
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
+ addLiveInBlock(LI, DomTree->getNode(MBB));
+ if (MBB == KillMBB)
+ LiveIn.back().Kill = Kill;
+ }
+
+ return false;
+}
+
+
+// This is essentially the same iterative algorithm that SSAUpdater uses,
+// except we already have a dominator tree, so we don't have to recompute it.
+void LiveRangeCalc::updateSSA() {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ // Interate until convergence.
+ unsigned Changes;
+ do {
+ Changes = 0;
+ // Propagate live-out values down the dominator tree, inserting phi-defs
+ // when necessary.
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
+ E = LiveIn.end(); I != E; ++I) {
+ MachineDomTreeNode *Node = I->DomNode;
+ // Skip block if the live-in value has already been determined.
+ if (!Node)
+ continue;
+ MachineBasicBlock *MBB = Node->getBlock();
+ MachineDomTreeNode *IDom = Node->getIDom();
+ LiveOutPair IDomValue;
+
+ // We need a live-in value to a block with no immediate dominator?
+ // This is probably an unreachable block that has survived somehow.
+ bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber());
+
+ // IDom dominates all of our predecessors, but it may not be their
+ // immediate dominator. Check if any of them have live-out values that are
+ // properly dominated by IDom. If so, we need a phi-def here.
+ if (!needPHI) {
+ IDomValue = LiveOut[IDom->getBlock()];
+
+ // Cache the DomTree node that defined the value.
+ if (IDomValue.first && !IDomValue.second)
+ LiveOut[IDom->getBlock()].second = IDomValue.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ LiveOutPair &Value = LiveOut[*PI];
+ if (!Value.first || Value.first == IDomValue.first)
+ continue;
+
+ // Cache the DomTree node that defined the value.
+ if (!Value.second)
+ Value.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def));
+
+ // This predecessor is carrying something other than IDomValue.
+ // It could be because IDomValue hasn't propagated yet, or it could be
+ // because MBB is in the dominance frontier of that value.
+ if (DomTree->dominates(IDom, Value.second)) {
+ needPHI = true;
+ break;
+ }
+ }
+ }
+
+ // The value may be live-through even if Kill is set, as can happen when
+ // we are called from extendRange. In that case LiveOutSeen is true, and
+ // LiveOut indicates a foreign or missing value.
+ LiveOutPair &LOP = LiveOut[MBB];
+
+ // Create a phi-def if required.
+ if (needPHI) {
+ ++Changes;
+ assert(Alloc && "Need VNInfo allocator to create PHI-defs");
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(MBB);
+ VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
+ I->Value = VNI;
+ // This block is done, we know the final value.
+ I->DomNode = 0;
+
+ // Add liveness since updateLiveIns now skips this node.
+ if (I->Kill.isValid())
+ I->LI->addRange(LiveRange(Start, I->Kill, VNI));
+ else {
+ I->LI->addRange(LiveRange(Start, End, VNI));
+ LOP = LiveOutPair(VNI, Node);
+ }
+ } else if (IDomValue.first) {
+ // No phi-def here. Remember incoming value.
+ I->Value = IDomValue.first;
+
+ // If the IDomValue is killed in the block, don't propagate through.
+ if (I->Kill.isValid())
+ continue;
+
+ // Propagate IDomValue if it isn't killed:
+ // MBB is live-out and doesn't define its own value.
+ if (LOP.first == IDomValue.first)
+ continue;
+ ++Changes;
+ LOP = IDomValue;
+ }
+ }
+ } while (Changes);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
new file mode 100644
index 0000000..57cab7b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -0,0 +1,242 @@
+//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeCalc class can be used to compute live ranges from scratch. It
+// caches information about values in the CFG to speed up repeated operations
+// on the same live range. The cache can be shared by non-overlapping live
+// ranges. SplitKit uses that when computing the live range of split products.
+//
+// A low-level interface is available to clients that know where a variable is
+// live, but don't know which value it has as every point. LiveRangeCalc will
+// propagate values down the dominator tree, and even insert PHI-defs where
+// needed. SplitKit uses this faster interface when possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGECALC_H
+#define LLVM_CODEGEN_LIVERANGECALC_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+/// Forward declarations for MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
+class LiveRangeCalc {
+ const MachineFunction *MF;
+ const MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ MachineDominatorTree *DomTree;
+ VNInfo::Allocator *Alloc;
+
+ /// Seen - Bit vector of active entries in LiveOut, also used as a visited
+ /// set by findReachingDefs. One entry per basic block, indexed by block
+ /// number. This is kept as a separate bit vector because it can be cleared
+ /// quickly when switching live ranges.
+ BitVector Seen;
+
+ /// LiveOutPair - A value and the block that defined it. The domtree node is
+ /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
+ typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+
+ /// LiveOutMap - Map basic blocks to the value leaving the block.
+ typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+
+ /// LiveOut - Map each basic block where a live range is live out to the
+ /// live-out value and its defining block.
+ ///
+ /// For every basic block, MBB, one of these conditions shall be true:
+ ///
+ /// 1. !Seen.count(MBB->getNumber())
+ /// Blocks without a Seen bit are ignored.
+ /// 2. LiveOut[MBB].second.getNode() == MBB
+ /// The live-out value is defined in MBB.
+ /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB]
+ /// The live-out value passses through MBB. All predecessors must carry
+ /// the same value.
+ ///
+ /// The domtree node may be null, it can be computed.
+ ///
+ /// The map can be shared by multiple live ranges as long as no two are
+ /// live-out of the same block.
+ LiveOutMap LiveOut;
+
+ /// LiveInBlock - Information about a basic block where a live range is known
+ /// to be live-in, but the value has not yet been determined.
+ struct LiveInBlock {
+ // LI - The live range that is live-in to this block. The algorithms can
+ // handle multiple non-overlapping live ranges simultaneously.
+ LiveInterval *LI;
+
+ // DomNode - Dominator tree node for the block.
+ // Cleared when the final value has been determined and LI has been updated.
+ MachineDomTreeNode *DomNode;
+
+ // Position in block where the live-in range ends, or SlotIndex() if the
+ // range passes through the block. When the final value has been
+ // determined, the range from the block start to Kill will be added to LI.
+ SlotIndex Kill;
+
+ // Live-in value filled in by updateSSA once it is known.
+ VNInfo *Value;
+
+ LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill)
+ : LI(li), DomNode(node), Kill(kill), Value(0) {}
+ };
+
+ /// LiveIn - Work list of blocks where the live-in value has yet to be
+ /// determined. This list is typically computed by findReachingDefs() and
+ /// used as a work list by updateSSA(). The low-level interface may also be
+ /// used to add entries directly.
+ SmallVector<LiveInBlock, 16> LiveIn;
+
+ /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set
+ /// of defs that can reach it.
+ ///
+ /// If only one def can reach Kill, all paths from the def to kill are added
+ /// to LI, and the function returns true.
+ ///
+ /// If multiple values can reach Kill, the blocks that need LI to be live in
+ /// are added to the LiveIn array, and the function returns false.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ bool findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ unsigned PhysReg);
+
+ /// updateSSA - Compute the values that will be live in to all requested
+ /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
+ ///
+ /// Every live-in block must be jointly dominated by the added live-out
+ /// blocks. No values are read from the live ranges.
+ void updateSSA();
+
+ /// Add liveness as specified in the LiveIn vector.
+ void updateLiveIns();
+
+public:
+ LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {}
+
+ //===--------------------------------------------------------------------===//
+ // High-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Calculate live ranges from scratch.
+ //
+
+ /// reset - Prepare caches for a new set of non-overlapping live ranges. The
+ /// caches must be reset before attempting calculations with a live range
+ /// that may overlap a previously computed live range, and before the first
+ /// live range in a function. If live ranges are not known to be
+ /// non-overlapping, call reset before each.
+ void reset(const MachineFunction *MF,
+ SlotIndexes*,
+ MachineDominatorTree*,
+ VNInfo::Allocator*);
+
+ /// calculate - Calculate the live range of a virtual register from its defs
+ /// and uses. LI must be empty with no values.
+ void calculate(LiveInterval *LI);
+
+ //===--------------------------------------------------------------------===//
+ // Mid-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Modify existing live ranges.
+ //
+
+ /// extend - Extend the live range of LI to reach Kill.
+ ///
+ /// The existing values in LI must be live so they jointly dominate Kill. If
+ /// Kill is not dominated by a single existing value, PHI-defs are inserted
+ /// as required to preserve SSA form. If Kill is known to be dominated by a
+ /// single existing value, Alloc may be null.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0);
+
+ /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
+ /// Each instruction defining Reg gets a new VNInfo with a corresponding
+ /// minimal live range.
+ void createDeadDefs(LiveInterval *LI, unsigned Reg);
+
+ /// createDeadDefs - Create a dead def in LI for every def of LI->reg.
+ void createDeadDefs(LiveInterval *LI) {
+ createDeadDefs(LI, LI->reg);
+ }
+
+ /// extendToUses - Extend the live range of LI to reach all uses of Reg.
+ ///
+ /// All uses must be jointly dominated by existing liveness. PHI-defs are
+ /// inserted as needed to preserve SSA form.
+ void extendToUses(LiveInterval *LI, unsigned Reg);
+
+ /// extendToUses - Extend the live range of LI to reach all uses of LI->reg.
+ void extendToUses(LiveInterval *LI) {
+ extendToUses(LI, LI->reg);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Low-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // These functions can be used to compute live ranges where the live-in and
+ // live-out blocks are already known, but the SSA value in each block is
+ // unknown.
+ //
+ // After calling reset(), add known live-out values and known live-in blocks.
+ // Then call calculateValues() to compute the actual value that is
+ // live-in to each block, and add liveness to the live ranges.
+ //
+
+ /// setLiveOutValue - Indicate that VNI is live out from MBB. The
+ /// calculateValues() function will not add liveness for MBB, the caller
+ /// should take care of that.
+ ///
+ /// VNI may be null only if MBB is a live-through block also passed to
+ /// addLiveInBlock().
+ void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) {
+ Seen.set(MBB->getNumber());
+ LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0);
+ }
+
+ /// addLiveInBlock - Add a block with an unknown live-in value. This
+ /// function can only be called once per basic block. Once the live-in value
+ /// has been determined, calculateValues() will add liveness to LI.
+ ///
+ /// @param LI The live range that is live-in to the block.
+ /// @param DomNode The domtree node for the block.
+ /// @param Kill Index in block where LI is killed. If the value is
+ /// live-through, set Kill = SLotIndex() and also call
+ /// setLiveOutValue(MBB, 0).
+ void addLiveInBlock(LiveInterval *LI,
+ MachineDomTreeNode *DomNode,
+ SlotIndex Kill = SlotIndex()) {
+ LiveIn.push_back(LiveInBlock(LI, DomNode, Kill));
+ }
+
+ /// calculateValues - Calculate the value that will be live-in to each block
+ /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA
+ /// form. Add liveness to all live-in blocks up to the Kill point, or the
+ /// whole block for live-through blocks.
+ ///
+ /// Every predecessor of a live-in block must have been given a value with
+ /// setLiveOutValue, the value may be null for live-trough blocks.
+ void calculateValues();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 0000000..7793e96
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,387 @@
+//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
+STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
+STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
+
+void LiveRangeEdit::Delegate::anchor() { }
+
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) {
+ unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ if (VRM) {
+ VRM->grow();
+ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+ }
+ LiveInterval &LI = LIS.getOrCreateInterval(VReg);
+ NewRegs.push_back(&LI);
+ return LI;
+}
+
+bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
+ const MachineInstr *DefMI,
+ AliasAnalysis *aa) {
+ assert(DefMI && "Missing instruction");
+ ScannedRemattable = true;
+ if (!TII.isTriviallyReMaterializable(DefMI, aa))
+ return false;
+ Remattable.insert(VNI);
+ return true;
+}
+
+void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
+ for (LiveInterval::vni_iterator I = getParent().vni_begin(),
+ E = getParent().vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
+ if (!DefMI)
+ continue;
+ checkRematerializable(VNI, DefMI, aa);
+ }
+ ScannedRemattable = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
+ if (!ScannedRemattable)
+ scanRemattable(aa);
+ return !Remattable.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx) const {
+ OrigIdx = OrigIdx.getRegSlot(true);
+ UseIdx = UseIdx.getRegSlot(true);
+ for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OrigMI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
+ continue;
+
+ // We can't remat physreg uses, unless it is a constant.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
+ continue;
+ return false;
+ }
+
+ LiveInterval &li = LIS.getInterval(MO.getReg());
+ const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+
+ // Don't allow rematerialization immediately after the original def.
+ // It would be incorrect if OrigMI redefines the register.
+ // See PR14098.
+ if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
+ return false;
+
+ if (OVNI != li.getVNInfoAt(UseIdx))
+ return false;
+ }
+ return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove) {
+ assert(ScannedRemattable && "Call anyRematerializable first");
+
+ // Use scanRemattable info.
+ if (!Remattable.count(RM.ParentVNI))
+ return false;
+
+ // No defining instruction provided.
+ SlotIndex DefIdx;
+ if (RM.OrigMI)
+ DefIdx = LIS.getInstructionIndex(RM.OrigMI);
+ else {
+ DefIdx = RM.ParentVNI->def;
+ RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
+ assert(RM.OrigMI && "No defining instruction for remattable value");
+ }
+
+ // If only cheap remats were requested, bail out early.
+ if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove())
+ return false;
+
+ // Verify that all used registers are available with the same values.
+ if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx))
+ return false;
+
+ return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ const TargetRegisterInfo &tri,
+ bool Late) {
+ assert(RM.OrigMI && "Invalid remat");
+ TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ Rematted.insert(RM.ParentVNI);
+ return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+ .getRegSlot();
+}
+
+void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
+ if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg))
+ LIS.removeInterval(Reg);
+}
+
+bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
+ SmallVectorImpl<MachineInstr*> &Dead) {
+ MachineInstr *DefMI = 0, *UseMI = 0;
+
+ // Check that there is a single def and a single use.
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg),
+ E = MRI.reg_nodbg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MO.isDef()) {
+ if (DefMI && DefMI != MI)
+ return false;
+ if (!MI->canFoldAsLoad())
+ return false;
+ DefMI = MI;
+ } else if (!MO.isUndef()) {
+ if (UseMI && UseMI != MI)
+ return false;
+ // FIXME: Targets don't know how to fold subreg uses.
+ if (MO.getSubReg())
+ return false;
+ UseMI = MI;
+ }
+ }
+ if (!DefMI || !UseMI)
+ return false;
+
+ // Since we're moving the DefMI load, make sure we're not extending any live
+ // ranges.
+ if (!allUsesAvailableAt(DefMI,
+ LIS.getInstructionIndex(DefMI),
+ LIS.getInstructionIndex(UseMI)))
+ return false;
+
+ // We also need to make sure it is safe to move the load.
+ // Assume there are stores between DefMI and UseMI.
+ bool SawStore = true;
+ if (!DefMI->isSafeToMove(&TII, 0, SawStore))
+ return false;
+
+ DEBUG(dbgs() << "Try to fold single def: " << *DefMI
+ << " into single use: " << *UseMI);
+
+ SmallVector<unsigned, 8> Ops;
+ if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
+ return false;
+
+ MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI);
+ if (!FoldMI)
+ return false;
+ DEBUG(dbgs() << " folded: " << *FoldMI);
+ LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI);
+ UseMI->eraseFromParent();
+ DefMI->addRegisterDead(LI->reg, 0);
+ Dead.push_back(DefMI);
+ ++NumDCEFoldedLoads;
+ return true;
+}
+
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+ ArrayRef<unsigned> RegsBeingSpilled) {
+ SetVector<LiveInterval*,
+ SmallVector<LiveInterval*, 8>,
+ SmallPtrSet<LiveInterval*, 8> > ToShrink;
+
+ for (;;) {
+ // Erase all dead defs.
+ while (!Dead.empty()) {
+ MachineInstr *MI = Dead.pop_back_val();
+ assert(MI->allDefsAreDead() && "Def isn't really dead");
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+
+ // Never delete inline asm.
+ if (MI->isInlineAsm()) {
+ DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+ continue;
+ }
+
+ // Use the same criteria as DeadMachineInstructionElim.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(&TII, 0, SawStore)) {
+ DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+ continue;
+ }
+
+ DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+
+ // Collect virtual registers to be erased after MI is gone.
+ SmallVector<unsigned, 8> RegsToErase;
+ bool ReadsPhysRegs = false;
+
+ // Check for live intervals that may shrink
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg())
+ continue;
+ unsigned Reg = MOI->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Check if MI reads any unreserved physregs.
+ if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
+ ReadsPhysRegs = true;
+ continue;
+ }
+ LiveInterval &LI = LIS.getInterval(Reg);
+
+ // Shrink read registers, unless it is likely to be expensive and
+ // unlikely to change anything. We typically don't want to shrink the
+ // PIC base register that has lots of uses everywhere.
+ // Always shrink COPY uses that probably come from live range splitting.
+ if (MI->readsVirtualRegister(Reg) &&
+ (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
+ LI.killedAt(Idx)))
+ ToShrink.insert(&LI);
+
+ // Remove defined value.
+ if (MOI->isDef()) {
+ if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
+ LI.removeValNo(VNI);
+ if (LI.empty())
+ RegsToErase.push_back(Reg);
+ }
+ }
+ }
+
+ // Currently, we don't support DCE of physreg live ranges. If MI reads
+ // any unreserved physregs, don't erase the instruction, but turn it into
+ // a KILL instead. This way, the physreg live ranges don't end up
+ // dangling.
+ // FIXME: It would be better to have something like shrinkToUses() for
+ // physregs. That could potentially enable more DCE and it would free up
+ // the physreg. It would not happen often, though.
+ if (ReadsPhysRegs) {
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ // Remove all operands that aren't physregs.
+ for (unsigned i = MI->getNumOperands(); i; --i) {
+ const MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+ MI->RemoveOperand(i-1);
+ }
+ DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
+
+ // Erase any virtregs that are now empty and unused. There may be <undef>
+ // uses around. Keep the empty live range in that case.
+ for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
+ unsigned Reg = RegsToErase[i];
+ if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
+ ToShrink.remove(&LIS.getInterval(Reg));
+ eraseVirtReg(Reg);
+ }
+ }
+ }
+
+ if (ToShrink.empty())
+ break;
+
+ // Shrink just one live interval. Then delete new dead defs.
+ LiveInterval *LI = ToShrink.back();
+ ToShrink.pop_back();
+ if (foldAsLoad(LI, Dead))
+ continue;
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(LI->reg);
+ if (!LIS.shrinkToUses(LI, &Dead))
+ continue;
+
+ // Don't create new intervals for a register being spilled.
+ // The new intervals would have to be spilled anyway so its not worth it.
+ // Also they currently aren't spilled so creating them and not spilling
+ // them results in incorrect code.
+ bool BeingSpilled = false;
+ for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
+ if (LI->reg == RegsBeingSpilled[i]) {
+ BeingSpilled = true;
+ break;
+ }
+ }
+
+ if (BeingSpilled) continue;
+
+ // LI may have been separated, create new intervals.
+ LI->RenumberValues(LIS);
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ unsigned NumComp = ConEQ.Classify(LI);
+ if (NumComp <= 1)
+ continue;
+ ++NumFracRanges;
+ bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg;
+ DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
+ SmallVector<LiveInterval*, 8> Dups(1, LI);
+ for (unsigned i = 1; i != NumComp; ++i) {
+ Dups.push_back(&createFrom(LI->reg));
+ // If LI is an original interval that hasn't been split yet, make the new
+ // intervals their own originals instead of referring to LI. The original
+ // interval must contain all the split products, and LI doesn't.
+ if (IsOriginal)
+ VRM->setIsSplitFromReg(Dups.back()->reg, 0);
+ if (TheDelegate)
+ TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+ }
+ ConEQ.Distribute(&Dups[0], MRI);
+ DEBUG({
+ for (unsigned i = 0; i != NumComp; ++i)
+ dbgs() << '\t' << *Dups[i] << '\n';
+ });
+ }
+}
+
+void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+ const MachineLoopInfo &Loops) {
+ VirtRegAuxInfo VRAI(MF, LIS, Loops);
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ LiveInterval &LI = **I;
+ if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
+ DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
+ << MRI.getRegClass(LI.reg)->getName() << '\n');
+ VRAI.CalculateWeightAndHint(LI);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
new file mode 100644
index 0000000..0ef069f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -0,0 +1,154 @@
+//===-- LiveRegMatrix.cpp - Track register interference -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LiveRegMatrix analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+
+char LiveRegMatrix::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+
+LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID),
+ UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {}
+
+void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.addRequiredTransitive<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ unsigned NumRegUnits = TRI->getNumRegUnits();
+ if (NumRegUnits != Matrix.size())
+ Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]);
+ Matrix.init(LIUAlloc, NumRegUnits);
+
+ // Make sure no stale queries get reused.
+ invalidateVirtRegs();
+ return false;
+}
+
+void LiveRegMatrix::releaseMemory() {
+ for (unsigned i = 0, e = Matrix.size(); i != e; ++i) {
+ Matrix[i].clear();
+ Queries[i].clear();
+ }
+}
+
+void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << ':');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ MRI->setPhysRegUsed(PhysReg);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
+ Matrix[*Units].unify(VirtReg);
+ }
+ ++NumAssigned;
+ DEBUG(dbgs() << '\n');
+}
+
+void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
+ unsigned PhysReg = VRM->getPhys(VirtReg.reg);
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << ':');
+ VRM->clearVirt(VirtReg.reg);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
+ Matrix[*Units].extract(VirtReg);
+ }
+ ++NumUnassigned;
+ DEBUG(dbgs() << '\n');
+}
+
+bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ // Check if the cached information is valid.
+ // The same BitVector can be reused for all PhysRegs.
+ // We could cache multiple VirtRegs if it becomes necessary.
+ if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) {
+ RegMaskVirtReg = VirtReg.reg;
+ RegMaskTag = UserTag;
+ RegMaskUsable.clear();
+ LIS->checkRegMaskInterference(VirtReg, RegMaskUsable);
+ }
+
+ // The BitVector is indexed by PhysReg, not register unit.
+ // Regmask interference is more fine grained than regunits.
+ // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8.
+ return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg));
+}
+
+bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ if (VirtReg.empty())
+ return false;
+ CoalescerPair CP(VirtReg.reg, PhysReg, *TRI);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes()))
+ return true;
+ return false;
+}
+
+LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
+ unsigned RegUnit) {
+ LiveIntervalUnion::Query &Q = Queries[RegUnit];
+ Q.init(UserTag, &VirtReg, &Matrix[RegUnit]);
+ return Q;
+}
+
+LiveRegMatrix::InterferenceKind
+LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
+ if (VirtReg.empty())
+ return IK_Free;
+
+ // Regmask interference is the fastest check.
+ if (checkRegMaskInterference(VirtReg, PhysReg))
+ return IK_RegMask;
+
+ // Check for fixed interference.
+ if (checkRegUnitInterference(VirtReg, PhysReg))
+ return IK_RegUnit;
+
+ // Check the matrix for virtual register interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (query(VirtReg, *Units).checkInterference())
+ return IK_VirtReg;
+
+ return IK_Free;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
new file mode 100644
index 0000000..be11a8f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -0,0 +1,86 @@
+//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livestacks"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <limits>
+using namespace llvm;
+
+char LiveStacks::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+ S2IMap.clear();
+ S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getTarget().getRegisterInfo();
+ // FIXME: No analysis is being done right now. We are relying on the
+ // register allocators to provide the information.
+ return false;
+}
+
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::iterator I = S2IMap.find(Slot);
+ if (I == S2IMap.end()) {
+ I = S2IMap.insert(I, std::make_pair(Slot,
+ LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+ S2RCMap.insert(std::make_pair(Slot, RC));
+ } else {
+ // Use the largest common subclass register class.
+ const TargetRegisterClass *OldRC = S2RCMap[Slot];
+ S2RCMap[Slot] = TRI->getCommonSubClass(OldRC, RC);
+ }
+ return I->second;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+ OS << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(OS);
+ int Slot = I->first;
+ const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+ if (RC)
+ OS << " [" << RC->getName() << "]\n";
+ else
+ OS << " [Unknown]\n";
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 0000000..789eddc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,826 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass. For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using a sparse implementation based on
+// the machine code SSA form. This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function. It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block). If a physical
+// register is not register allocatable, it is not tracked. This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+char &llvm::LiveVariablesID = LiveVariables::ID;
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(UnreachableMachineBlockElimID);
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ if (Kills[i]->getParent() == MBB)
+ return Kills[i];
+ return NULL;
+}
+
+void LiveVariables::VarInfo::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << " Alive in blocks: ";
+ for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
+ E = AliveBlocks.end(); I != E; ++I)
+ dbgs() << *I << ", ";
+ dbgs() << "\n Killed by:";
+ if (Kills.empty())
+ dbgs() << " No instructions.\n";
+ else {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ dbgs() << "\n #" << i << ": " << *Kills[i];
+ dbgs() << "\n";
+ }
+#endif
+}
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+ "getVarInfo: not a virtual register!");
+ VirtRegInfo.grow(RegIdx);
+ return VirtRegInfo[RegIdx];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB,
+ std::vector<MachineBasicBlock*> &WorkList) {
+ unsigned BBNum = MBB->getNumber();
+
+ // Check to see if this basic block is one of the killing blocks. If so,
+ // remove it.
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ if (VRInfo.Kills[i]->getParent() == MBB) {
+ VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
+ break;
+ }
+
+ if (MBB == DefBlock) return; // Terminate recursion
+
+ if (VRInfo.AliveBlocks.test(BBNum))
+ return; // We already know the block is live
+
+ // Mark the variable known alive in this bb
+ VRInfo.AliveBlocks.set(BBNum);
+
+ assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
+ WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB) {
+ std::vector<MachineBasicBlock*> WorkList;
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *Pred = WorkList.back();
+ WorkList.pop_back();
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+ }
+}
+
+void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ assert(MRI->getVRegDef(reg) && "Register use before def!");
+
+ unsigned BBNum = MBB->getNumber();
+
+ VarInfo& VRInfo = getVarInfo(reg);
+
+ // Check to see if this basic block is already a kill block.
+ if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+ // Yes, this register is killed in this basic block already. Increase the
+ // live range by updating the kill instruction.
+ VRInfo.Kills.back() = MI;
+ return;
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+ // This situation can occur:
+ //
+ // ,------.
+ // | |
+ // | v
+ // | t2 = phi ... t1 ...
+ // | |
+ // | v
+ // | t1 = ...
+ // | ... = ... t1 ...
+ // | |
+ // `------'
+ //
+ // where there is a use in a PHI node that's a predecessor to the defining
+ // block. We don't want to mark all predecessors as having the value "alive"
+ // in this case.
+ if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+
+ // Add a new kill entry for this basic block. If this virtual register is
+ // already marked as alive in this basic block, that means it is alive in at
+ // least one of the successor blocks, it's not a kill.
+ if (!VRInfo.AliveBlocks.test(BBNum))
+ VRInfo.Kills.push_back(MI);
+
+ // Update all dominating blocks to mark them as "known live".
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI)
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+}
+
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
+ VarInfo &VRInfo = getVarInfo(Reg);
+
+ if (VRInfo.AliveBlocks.empty())
+ // If vr is not alive in any block, then defaults to dead.
+ VRInfo.Kills.push_back(MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-registers that're defined by the instruction.
+MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
+ SmallSet<unsigned,4> &PartDefRegs) {
+ unsigned LastDefReg = 0;
+ unsigned LastDefDist = 0;
+ MachineInstr *LastDef = NULL;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (!Def)
+ continue;
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastDefDist) {
+ LastDefReg = SubReg;
+ LastDef = Def;
+ LastDefDist = Dist;
+ }
+ }
+
+ if (!LastDef)
+ return 0;
+
+ PartDefRegs.insert(LastDefReg);
+ for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = LastDef->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ continue;
+ unsigned DefReg = MO.getReg();
+ if (TRI->isSubRegister(Reg, DefReg)) {
+ PartDefRegs.insert(DefReg);
+ for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs)
+ PartDefRegs.insert(*SubRegs);
+ }
+ }
+ return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ // If there was a previous use or a "full" def all is well.
+ if (!LastDef && !PhysRegUse[Reg]) {
+ // Otherwise, the last sub-register def implicitly defines this register.
+ // e.g.
+ // AH =
+ // AL = ... <imp-def EAX>, <imp-kill AH>
+ // = AH
+ // ...
+ // = EAX
+ // All of the sub-registers must have been defined before the use of Reg!
+ SmallSet<unsigned, 4> PartDefRegs;
+ MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
+ // If LastPartialDef is NULL, it must be using a livein register.
+ if (LastPartialDef) {
+ LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[Reg] = LastPartialDef;
+ SmallSet<unsigned, 8> Processed;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (Processed.count(SubReg))
+ continue;
+ if (PartDefRegs.count(SubReg))
+ continue;
+ // This part of Reg was defined before the last partial def. It's killed
+ // here.
+ LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+ false/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SubReg] = LastPartialDef;
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ Processed.insert(*SS);
+ }
+ }
+ } else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
+ // Last def defines the super register, add an implicit def of reg.
+ LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+
+ // Remember this use.
+ PhysRegUse[Reg] = MI;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ PhysRegUse[*SubRegs] = MI;
+}
+
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return 0;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ unsigned LastPartDefDist = 0;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist)
+ LastPartDefDist = Dist;
+ } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ return LastRefOrPartRef;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return false;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ // The whole register is used.
+ // AL =
+ // AH =
+ //
+ // = AX
+ // = AL, AX<imp-use, kill>
+ // AX =
+ //
+ // Or whole register is defined, but not used at all.
+ // AX<dead> =
+ // ...
+ // AX =
+ //
+ // Or whole register is defined, but only partly used.
+ // AX<dead> = AL<imp-def>
+ // = AL<kill>
+ // AX =
+ MachineInstr *LastPartDef = 0;
+ unsigned LastPartDefDist = 0;
+ SmallSet<unsigned, 8> PartUses;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist) {
+ LastPartDefDist = Dist;
+ LastPartDef = Def;
+ }
+ continue;
+ }
+ if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ PartUses.insert(SubReg);
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ PartUses.insert(*SS);
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ if (!PhysRegUse[Reg]) {
+ // Partial uses. Mark register def dead and add implicit def of
+ // sub-registers which are used.
+ // EAX<dead> = op AL<imp-def>
+ // That is, EAX def is dead but AL def extends pass it.
+ PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (!PartUses.count(SubReg))
+ continue;
+ bool NeedDef = true;
+ if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+ MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+ if (MO) {
+ NeedDef = false;
+ assert(!MO->isDead());
+ }
+ }
+ if (NeedDef)
+ PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+ true/*IsDef*/, true/*IsImp*/));
+ MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+ if (LastSubRef)
+ LastSubRef->addRegisterKilled(SubReg, TRI, true);
+ else {
+ LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+ PhysRegUse[SubReg] = LastRefOrPartRef;
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ PhysRegUse[*SS] = LastRefOrPartRef;
+ }
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ PartUses.erase(*SS);
+ }
+ } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+ if (LastPartDef)
+ // The last partial def kills the register.
+ LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/, true/*IsKill*/));
+ else {
+ MachineOperand *MO =
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+ bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+ // If the last reference is the last def, then it's not used at all.
+ // That is, unless we are currently processing the last reference itself.
+ LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+ if (NeedEC) {
+ // If we are adding a subreg def and the superreg def is marked early
+ // clobber, add an early clobber marker to the subreg def.
+ MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+ if (MO)
+ MO->setIsEarlyClobber();
+ }
+ }
+ } else
+ LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+ return true;
+}
+
+void LiveVariables::HandleRegMask(const MachineOperand &MO) {
+ // Call HandlePhysRegKill() for all live registers clobbered by Mask.
+ // Clobbered registers are always dead, sp there is no need to use
+ // HandlePhysRegDef().
+ for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
+ // Skip dead regs.
+ if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
+ continue;
+ // Skip mask-preserved regs.
+ if (!MO.clobbersPhysReg(Reg))
+ continue;
+ // Kill the largest clobbered super-register.
+ // This avoids needless implicit operands.
+ unsigned Super = Reg;
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
+ Super = *SR;
+ HandlePhysRegKill(Super, 0);
+ }
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs) {
+ // What parts of the register are previously defined?
+ SmallSet<unsigned, 32> Live;
+ if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+ Live.insert(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Live.insert(*SubRegs);
+ } else {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ // If a register isn't itself defined, but all parts that make up of it
+ // are defined, then consider it also defined.
+ // e.g.
+ // AL =
+ // AH =
+ // = AX
+ if (Live.count(SubReg))
+ continue;
+ if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+ Live.insert(SubReg);
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ Live.insert(*SS);
+ }
+ }
+ }
+
+ // Start from the largest piece, find the last time any part of the register
+ // is referenced.
+ HandlePhysRegKill(Reg, MI);
+ // Only some of the sub-registers are used.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (!Live.count(SubReg))
+ // Skip if this sub-register isn't defined.
+ continue;
+ HandlePhysRegKill(SubReg, MI);
+ }
+
+ if (MI)
+ Defs.push_back(Reg); // Remember this def.
+}
+
+void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs) {
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.back();
+ Defs.pop_back();
+ PhysRegDef[Reg] = MI;
+ PhysRegUse[Reg] = NULL;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ PhysRegDef[SubReg] = MI;
+ PhysRegUse[SubReg] = NULL;
+ }
+ }
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+
+ unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef = new MachineInstr*[NumRegs];
+ PhysRegUse = new MachineInstr*[NumRegs];
+ PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ PHIJoins.clear();
+
+ // FIXME: LiveIntervals will be updated to remove its dependence on
+ // LiveVariables to improve compilation time and eliminate bizarre pass
+ // dependencies. Until then, we can't change much in -O0.
+ if (!MRI->isSSA())
+ report_fatal_error("regalloc=... not currently supported with -O0");
+
+ analyzePHINodes(mf);
+
+ // Calculate live variable information in depth first order on the CFG of the
+ // function. This guarantees that we will see the definition of a virtual
+ // register before its uses due to dominance properties of SSA (except for PHI
+ // nodes, which are treated as a special case).
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+
+ // Mark live-in registers as live-in.
+ SmallVector<unsigned, 4> Defs;
+ for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
+ EE = MBB->livein_end(); II != EE; ++II) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(*II, 0, Defs);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ DistanceMap.insert(std::make_pair(MI, Dist++));
+
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI->getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI->isPHI())
+ NumOperandsToProcess = 1;
+
+ // Clear kill and dead markers. LV will recompute them.
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ SmallVector<unsigned, 1> RegMasks;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask()) {
+ RegMasks.push_back(i);
+ continue;
+ }
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ MO.setIsKill(false);
+ if (MO.readsReg())
+ UseRegs.push_back(MOReg);
+ } else /*MO.isDef()*/ {
+ MO.setIsDead(false);
+ DefRegs.push_back(MOReg);
+ }
+ }
+
+ // Process all uses.
+ for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+ unsigned MOReg = UseRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all masked registers. (Call clobbers).
+ for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
+ HandleRegMask(MI->getOperand(RegMasks[i]));
+
+ // Process all defs.
+ for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+ unsigned MOReg = DefRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegDef(MOReg, MI, Defs);
+ }
+ UpdatePhysRegDefs(MI, Defs);
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MBB);
+ }
+
+ // MachineCSE may CSE instructions which write to non-allocatable physical
+ // registers across MBBs. Remember if any reserved register is liveout.
+ SmallSet<unsigned, 4> LiveOuts;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->isLandingPad())
+ continue;
+ for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
+ LE = SuccMBB->livein_end(); LI != LE; ++LI) {
+ unsigned LReg = *LI;
+ if (!TRI->isInAllocatableClass(LReg))
+ // Ignore other live-ins, e.g. those that are live into landing pads.
+ LiveOuts.insert(LReg);
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
+ HandlePhysRegDef(i, 0, Defs);
+
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ }
+
+ // Convert and transfer the dead / killed information we have gathered into
+ // VirtRegInfo onto MI's.
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+ const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+ VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
+ else
+ VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+ }
+
+ // Check to make sure there are no unreachable blocks in the MC CFG for the
+ // function. If so, it is due to a bug in the instruction selector or some
+ // other part of the code generator if this happens.
+#ifndef NDEBUG
+ for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+ assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+ delete[] PhysRegDef;
+ delete[] PhysRegUse;
+ delete[] PHIVarInfo;
+
+ return false;
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
+ MachineInstr *NewMI) {
+ VarInfo &VI = getVarInfo(Reg);
+ std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ MO.setIsKill(false);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ (void)removed;
+ }
+ }
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i).readsReg())
+ PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI->getOperand(i).getReg());
+}
+
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+ unsigned Reg,
+ MachineRegisterInfo &MRI) {
+ unsigned Num = MBB.getNumber();
+
+ // Reg is live-through.
+ if (AliveBlocks.test(Num))
+ return true;
+
+ // Registers defined in MBB cannot be live in.
+ const MachineInstr *Def = MRI.getVRegDef(Reg);
+ if (Def && Def->getParent() == &MBB)
+ return false;
+
+ // Reg was not defined in MBB, was it killed here?
+ return findKill(&MBB);
+}
+
+bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+ LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ SmallVector<MachineBasicBlock*, 8> OpSuccBlocks;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ E = MBB.succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (VI.AliveBlocks.test(SuccIdx))
+ return true;
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB)
+ return true;
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB1 ||
+ VI.Kills[i]->getParent() == SuccMBB2)
+ return true;
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ VI.Kills[i]->getParent()))
+ return true;
+ }
+ return false;
+}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB,
+ MachineBasicBlock *SuccBB) {
+ const unsigned NumNew = BB->getNumber();
+
+ SmallSet<unsigned, 16> Defs, Kills;
+
+ MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end();
+ for (; BBI != BBE && BBI->isPHI(); ++BBI) {
+ // Record the def of the PHI node.
+ Defs.insert(BBI->getOperand(0).getReg());
+
+ // All registers used by PHI nodes in SuccBB must be live through BB.
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i+1).getMBB() == BB)
+ getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+ }
+
+ // Record all vreg defs and kills of all instructions in SuccBB.
+ for (; BBI != BBE; ++BBI) {
+ for (MachineInstr::mop_iterator I = BBI->operands_begin(),
+ E = BBI->operands_end(); I != E; ++I) {
+ if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) {
+ if (I->isDef())
+ Defs.insert(I->getReg());
+ else if (I->isKill())
+ Kills.insert(I->getReg());
+ }
+ }
+ }
+
+ // Update info for all live variables
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+ // If the Defs is defined in the successor it can't be live in BB.
+ if (Defs.count(Reg))
+ continue;
+
+ // If the register is either killed in or live through SuccBB it's also live
+ // through BB.
+ VarInfo &VI = getVarInfo(Reg);
+ if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber()))
+ VI.AliveBlocks.set(NumNew);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 0000000..352ef94
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,356 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates they are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "localstackalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+ class FrameRef {
+ MachineBasicBlock::iterator MI; // Instr referencing the frame
+ int64_t LocalOffset; // Local offset of the frame idx referenced
+ public:
+ FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
+ MI(I), LocalOffset(Offset) {}
+ bool operator<(const FrameRef &RHS) const {
+ return LocalOffset < RHS.LocalOffset;
+ }
+ MachineBasicBlock::iterator getMachineInstr() { return MI; }
+ };
+
+ class LocalStackSlotPass: public MachineFunctionPass {
+ SmallVector<int64_t,16> LocalOffsets;
+
+ void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown, unsigned &MaxAlign);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ bool insertFrameReferenceRegisters(MachineFunction &Fn);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ };
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc",
+ "Local Stack Slot Allocation", false, false)
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+
+ // If the target doesn't want/need this pass, or if there are no locals
+ // to consider, early exit.
+ if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+ return true;
+
+ // Make sure we have enough space to store the local offsets.
+ LocalOffsets.resize(MFI->getObjectIndexEnd());
+
+ // Lay out the local blob.
+ calculateFrameObjectOffsets(MF);
+
+ // Insert virtual base registers to resolve frame index references.
+ bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+ // Tell MFI whether any base registers were allocated. PEI will only
+ // want to use the local block allocations from this pass if there were any.
+ // Otherwise, PEI can do a bit better job of getting the alignment right
+ // without a hole at the start since it knows the alignment of the stack
+ // at the start of local allocation, and this pass doesn't.
+ MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+ return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+ int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+ DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+ << LocalOffset << "\n");
+ // Keep the offset available for base register allocation
+ LocalOffsets[FrameIdx] = LocalOffset;
+ // And tell MFI about it for PEI to use later
+ MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+
+ if (!StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ ++NumAllocations;
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int64_t Offset = 0;
+ unsigned MaxAlign = 0;
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+ StackGrowsDown, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (LargeStackObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ }
+
+ // Remember how big this blob of stack space is
+ MFI->setLocalFrameSize(Offset);
+ MFI->setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
+ std::pair<unsigned, int64_t> &RegOffset,
+ int64_t FrameSizeAdjust,
+ int64_t LocalFrameOffset,
+ const MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ unsigned e = Regs.size();
+ for (unsigned i = 0; i < e; ++i) {
+ RegOffset = Regs[i];
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
+ if (TRI->isFrameOffsetLegal(MI, Offset))
+ return true;
+ }
+ return false;
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+ // Scan the function's instructions looking for frame index references.
+ // For each, ask the target if it wants a virtual base register for it
+ // based on what we can tell it about where the local will end up in the
+ // stack frame. If it wants one, re-use a suitable one we've previously
+ // allocated, or if there isn't one that fits the bill, allocate a new one
+ // and ask the target to create a defining instruction for it.
+ bool UsedBaseReg = false;
+
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Collect all of the instructions in the block that reference
+ // a frame index. Also store the frame index referenced to ease later
+ // lookup. (For any insn that has more than one FI reference, we arbitrarily
+ // choose the first one).
+ SmallVector<FrameRef, 64> FrameReferenceInsns;
+
+ // A base register definition is a register + offset pair.
+ SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ MachineInstr *MI = I;
+
+ // Debug value instructions can't be out of range, so they don't need
+ // any updates.
+ if (MI->isDebugValue())
+ continue;
+
+ // For now, allocate the base register(s) within the basic block
+ // where they're used, and don't try to keep them around outside
+ // of that. It may be beneficial to try sharing them more broadly
+ // than that, but the increased register pressure makes that a
+ // tricky thing to balance. Investigate if re-materializing these
+ // becomes an issue.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(i).isFI()) {
+ // Don't try this with values not in the local block.
+ if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
+ break;
+ FrameReferenceInsns.
+ push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+ break;
+ }
+ }
+ }
+ }
+
+ // Sort the frame references by local offset
+ array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+
+ MachineBasicBlock *Entry = Fn.begin();
+
+ // Loop through the frame references and allocate for them as necessary.
+ for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+ MachineBasicBlock::iterator I =
+ FrameReferenceInsns[ref].getMachineInstr();
+ MachineInstr *MI = I;
+ for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(idx).isFI()) {
+ int FrameIdx = MI->getOperand(idx).getIndex();
+
+ assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ DEBUG(dbgs() << "Considering: " << *MI);
+ if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
+ unsigned BaseReg = 0;
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust =
+ StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+ DEBUG(dbgs() << " Replacing FI in: " << *MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ std::pair<unsigned, int64_t> RegOffset;
+ if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
+ FrameSizeAdjust,
+ LocalOffsets[FrameIdx],
+ MI, TRI)) {
+ DEBUG(dbgs() << " Reusing base register " <<
+ RegOffset.first << "\n");
+ // We found a register to reuse.
+ BaseReg = RegOffset.first;
+ Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
+ RegOffset.second;
+ } else {
+ // No previously defined register was in range, so create a
+ // new one.
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
+ BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+ DEBUG(dbgs() << " Materializing base register " << BaseReg <<
+ " at frame local offset " <<
+ LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+ InstrOffset);
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
+ InstrOffset;
+ BaseRegisters.push_back(
+ std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
+ ++NumBaseRegisters;
+ UsedBaseReg = true;
+ }
+ assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(I, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << *MI);
+
+ ++NumReplacements;
+ }
+ }
+ }
+ }
+ return UsedBaseReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 0000000..898e165
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,1181 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
+ : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
+ AddressTaken(false) {
+ Insts.Parent = this;
+}
+
+MachineBasicBlock::~MachineBasicBlock() {
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// getSymbol - Return the MCSymbol for this basic block.
+///
+MCSymbol *MachineBasicBlock::getSymbol() const {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+ return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+ Twine(MF->getFunctionNumber()) + "_" +
+ Twine(getNumber()));
+}
+
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
+ MBB.print(OS);
+ return OS;
+}
+
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
+/// parent pointer of the MBB, the MBB numbering, and any instructions in the
+/// MBB to be on the right operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+ MachineFunction &MF = *N->getParent();
+ N->Number = MF.addToMBBNumbering(N);
+
+ // Make sure the instructions have their operands in the reginfo lists.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (MachineBasicBlock::instr_iterator
+ I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
+ I->AddRegOperandsToUseLists(RegInfo);
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+ N->getParent()->removeFromMBBNumbering(N->Number);
+ N->Number = -1;
+ LeakDetector::addGarbageObject(N);
+}
+
+
+/// addNodeToList (MI) - When we add an instruction to a basic block
+/// list, we update its parent pointer and add its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
+ assert(N->getParent() == 0 && "machine instruction already in a basic block");
+ N->setParent(Parent);
+
+ // Add the instruction's register operands to their corresponding
+ // use/def lists.
+ MachineFunction *MF = Parent->getParent();
+ N->AddRegOperandsToUseLists(MF->getRegInfo());
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+/// removeNodeFromList (MI) - When we remove an instruction from a basic block
+/// list, we update its parent pointer and remove its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
+ assert(N->getParent() != 0 && "machine instruction not in a basic block");
+
+ // Remove from the use/def lists.
+ if (MachineFunction *MF = N->getParent()->getParent())
+ N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
+
+ N->setParent(0);
+
+ LeakDetector::addGarbageObject(N);
+}
+
+/// transferNodesFromList (MI) - When moving a range of instructions from one
+/// MBB list to another, we need to update the parent pointers and the use/def
+/// lists.
+void ilist_traits<MachineInstr>::
+transferNodesFromList(ilist_traits<MachineInstr> &fromList,
+ ilist_iterator<MachineInstr> first,
+ ilist_iterator<MachineInstr> last) {
+ assert(Parent->getParent() == fromList.Parent->getParent() &&
+ "MachineInstr parent mismatch!");
+
+ // Splice within the same MBB -> no change.
+ if (Parent == fromList.Parent) return;
+
+ // If splicing between two blocks within the same function, just update the
+ // parent pointers.
+ for (; first != last; ++first)
+ first->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+ assert(!MI->getParent() && "MI is still in a block!");
+ Parent->getParent()->DeleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
+ instr_iterator I = instr_begin(), E = instr_end();
+ while (I != E && I->isPHI())
+ ++I;
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi MI cannot be inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ iterator E = end();
+ while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+ ++I;
+ // FIXME: This needs to change if we wish to bundle labels / dbg_values
+ // inside the bundle.
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi / non-label instruction is inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+ iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getFirstTerminator() const {
+ const_iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+ instr_iterator B = instr_begin(), E = instr_end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+ // Skip over end-of-block dbg_value instructions.
+ instr_iterator B = instr_begin(), I = instr_end();
+ while (I != B) {
+ --I;
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getLastNonDebugInstr() const {
+ // Skip over end-of-block dbg_value instructions.
+ const_instr_iterator B = instr_begin(), I = instr_end();
+ while (I != B) {
+ --I;
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+ // A block with a landing pad successor only has one other successor.
+ if (succ_size() > 2)
+ return 0;
+ for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+ if ((*I)->isLandingPad())
+ return *I;
+ return 0;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineBasicBlock::dump() const {
+ print(dbgs());
+}
+#endif
+
+StringRef MachineBasicBlock::getName() const {
+ if (const BasicBlock *LBB = getBasicBlock())
+ return LBB->getName();
+ else
+ return "(null)";
+}
+
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+ std::string Name;
+ if (getParent())
+ Name = (getParent()->getName() + ":").str();
+ if (getBasicBlock())
+ Name += getBasicBlock()->getName();
+ else
+ Name += (Twine("BB") + Twine(getNumber())).str();
+ return Name;
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+ const MachineFunction *MF = getParent();
+ if (!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+
+ if (Indexes)
+ OS << Indexes->getMBBStartIdx(this) << '\t';
+
+ OS << "BB#" << getNumber() << ": ";
+
+ const char *Comma = "";
+ if (const BasicBlock *LBB = getBasicBlock()) {
+ OS << Comma << "derived from LLVM BB ";
+ WriteAsOperand(OS, LBB, /*PrintType=*/false);
+ Comma = ", ";
+ }
+ if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+ if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+ if (Alignment)
+ OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
+ << " bytes)";
+
+ OS << '\n';
+
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (!livein_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Live Ins:";
+ for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+ OS << ' ' << PrintReg(*I, TRI);
+ OS << '\n';
+ }
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Predecessors according to CFG:";
+ for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+ OS << " BB#" << (*PI)->getNumber();
+ OS << '\n';
+ }
+
+ for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
+ if (Indexes) {
+ if (Indexes->hasIndex(I))
+ OS << Indexes->getInstructionIndex(I);
+ OS << '\t';
+ }
+ OS << '\t';
+ if (I->isInsideBundle())
+ OS << " * ";
+ I->print(OS, &getParent()->getTarget());
+ }
+
+ // Print the successors of this block according to the CFG.
+ if (!succ_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Successors according to CFG:";
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
+ OS << " BB#" << (*SI)->getNumber();
+ if (!Weights.empty())
+ OS << '(' << *getWeightIterator(SI) << ')';
+ }
+ OS << '\n';
+ }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+ std::vector<unsigned>::iterator I =
+ std::find(LiveIns.begin(), LiveIns.end(), Reg);
+ if (I != LiveIns.end())
+ LiveIns.erase(I);
+}
+
+bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
+ livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ return I != livein_end();
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+ getParent()->splice(NewAfter, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+ MachineFunction::iterator BBI = NewBefore;
+ getParent()->splice(++BBI, this);
+}
+
+void MachineBasicBlock::updateTerminator() {
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ // A block with no successors has no concerns with fall-through edges.
+ if (this->succ_empty()) return;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
+ bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+ (void) B;
+ assert(!B && "UpdateTerminators requires analyzable predecessors!");
+ if (Cond.empty()) {
+ if (TBB) {
+ // The block has an unconditional branch. If its successor is now
+ // its layout successor, delete the branch.
+ if (isLayoutSuccessor(TBB))
+ TII->RemoveBranch(*this);
+ } else {
+ // The block has an unconditional fallthrough. If its successor is not
+ // its layout successor, insert a branch. First we have to locate the
+ // only non-landing-pad successor, as that is the fallthrough block.
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isLandingPad())
+ continue;
+ assert(!TBB && "Found more than one non-landing-pad successor!");
+ TBB = *SI;
+ }
+
+ // If there is no non-landing-pad successor, the block has no
+ // fall-through edges to be concerned with.
+ if (!TBB)
+ return;
+
+ // Finally update the unconditional successor to be reached via a branch
+ // if it would not be reached by fallthrough.
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ }
+ } else {
+ if (FBB) {
+ // The block has a non-fallthrough conditional branch. If one of its
+ // successors is its layout successor, rewrite it to a fallthrough
+ // conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond))
+ return;
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FBB, 0, Cond, dl);
+ } else if (isLayoutSuccessor(FBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ }
+ } else {
+ // Walk through the successors and find the successor which is not
+ // a landing pad and is not the conditional branch destination (in TBB)
+ // as the fallthrough successor.
+ MachineBasicBlock *FallthroughBB = 0;
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isLandingPad() || *SI == TBB)
+ continue;
+ assert(!FallthroughBB && "Found more than one fallthrough successor.");
+ FallthroughBB = *SI;
+ }
+ if (!FallthroughBB && canFallThrough()) {
+ // We fallthrough to the same basic block as the conditional jump
+ // targets. Remove the conditional jump, leaving unconditional
+ // fallthrough.
+ // FIXME: This does not seem like a reasonable pattern to support, but it
+ // has been seen in the wild coming out of degenerate ARM test cases.
+ TII->RemoveBranch(*this);
+
+ // Finally update the unconditional successor to be reached via a branch
+ // if it would not be reached by fallthrough.
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ return;
+ }
+
+ // The block has a fallthrough conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond)) {
+ // We can't reverse the condition, add an unconditional branch.
+ Cond.clear();
+ TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
+ return;
+ }
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
+ } else if (!isLayoutSuccessor(FallthroughBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl);
+ }
+ }
+ }
+}
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
+
+ // If we see non-zero value for the first time it means we actually use Weight
+ // list, so we fill all Weights with 0's.
+ if (weight != 0 && Weights.empty())
+ Weights.resize(Successors.size());
+
+ if (weight != 0 || !Weights.empty())
+ Weights.push_back(weight);
+
+ Successors.push_back(succ);
+ succ->addPredecessor(this);
+ }
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+ succ->removePredecessor(this);
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ assert(I != Successors.end() && "Not a current successor!");
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(I);
+ Weights.erase(WI);
+ }
+
+ Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator
+MachineBasicBlock::removeSuccessor(succ_iterator I) {
+ assert(I != Successors.end() && "Not a current successor!");
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(I);
+ Weights.erase(WI);
+ }
+
+ (*I)->removePredecessor(this);
+ return Successors.erase(I);
+}
+
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ if (Old == New)
+ return;
+
+ succ_iterator E = succ_end();
+ succ_iterator NewI = E;
+ succ_iterator OldI = E;
+ for (succ_iterator I = succ_begin(); I != E; ++I) {
+ if (*I == Old) {
+ OldI = I;
+ if (NewI != E)
+ break;
+ }
+ if (*I == New) {
+ NewI = I;
+ if (OldI != E)
+ break;
+ }
+ }
+ assert(OldI != E && "Old is not a successor of this block");
+ Old->removePredecessor(this);
+
+ // If New isn't already a successor, let it take Old's place.
+ if (NewI == E) {
+ New->addPredecessor(this);
+ *OldI = New;
+ return;
+ }
+
+ // New is already a successor.
+ // Update its weight instead of adding a duplicate edge.
+ if (!Weights.empty()) {
+ weight_iterator OldWI = getWeightIterator(OldI);
+ *getWeightIterator(NewI) += *OldWI;
+ Weights.erase(OldWI);
+ }
+ Successors.erase(OldI);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+ Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+ pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
+ assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+ Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
+ if (this == fromMBB)
+ return;
+
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ uint32_t Weight = 0;
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!fromMBB->Weights.empty())
+ Weight = *fromMBB->Weights.begin();
+
+ addSuccessor(Succ, Weight);
+ fromMBB->removeSuccessor(Succ);
+ }
+}
+
+void
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
+ if (this == fromMBB)
+ return;
+
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ uint32_t Weight = 0;
+ if (!fromMBB->Weights.empty())
+ Weight = *fromMBB->Weights.begin();
+ addSuccessor(Succ, Weight);
+ fromMBB->removeSuccessor(Succ);
+
+ // Fix up any PHI nodes in the successor.
+ for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
+ ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
+ for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.getMBB() == fromMBB)
+ MO.setMBB(this);
+ }
+ }
+}
+
+bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
+ return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+ return std::find(succ_begin(), succ_end(), MBB) != succ_end();
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+ MachineFunction::const_iterator I(this);
+ return llvm::next(I) == MachineFunction::const_iterator(MBB);
+}
+
+bool MachineBasicBlock::canFallThrough() {
+ MachineFunction::iterator Fallthrough = this;
+ ++Fallthrough;
+ // If FallthroughBlock is off the end of the function, it can't fall through.
+ if (Fallthrough == getParent()->end())
+ return false;
+
+ // If FallthroughBlock isn't a successor, no fallthrough is possible.
+ if (!isSuccessor(Fallthrough))
+ return false;
+
+ // Analyze the branches, if any, at the end of the block.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
+ // If we couldn't analyze the branch, examine the last instruction.
+ // If the block doesn't end in a known control barrier, assume fallthrough
+ // is possible. The isPredicated check is needed because this code can be
+ // called during IfConversion, where an instruction which is normally a
+ // Barrier is predicated and thus no longer an actual control barrier.
+ return empty() || !back().isBarrier() || TII->isPredicated(&back());
+ }
+
+ // If there is no branch, control always falls through.
+ if (TBB == 0) return true;
+
+ // If there is some explicit branch to the fallthrough block, it can obviously
+ // reach, even though the branch should get folded to fall through implicitly.
+ if (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough)
+ return true;
+
+ // If it's an unconditional branch to some block not the fall through, it
+ // doesn't fall through.
+ if (Cond.empty()) return false;
+
+ // Otherwise, if it is conditional and has no explicit false block, it falls
+ // through.
+ return FBB == 0;
+}
+
+MachineBasicBlock *
+MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (Succ->isLandingPad())
+ return NULL;
+
+ MachineFunction *MF = getParent();
+ DebugLoc dl; // FIXME: this is nowhere
+
+ // We may need to update this's terminator, but we can't do that if
+ // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
+ return NULL;
+
+ // Avoid bugpoint weirdness: A block may end with a conditional branch but
+ // jumps to the same MBB is either case. We have duplicate CFG edges in that
+ // case that we can't handle. Since this never happens in properly optimized
+ // code, just skip those edges.
+ if (TBB && TBB == FBB) {
+ DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+ << getNumber() << '\n');
+ return NULL;
+ }
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
+ DEBUG(dbgs() << "Splitting critical edge:"
+ " BB#" << getNumber()
+ << " -- BB#" << NMBB->getNumber()
+ << " -- BB#" << Succ->getNumber() << '\n');
+
+ LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>();
+ SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>();
+ if (LIS)
+ LIS->insertMBBInMaps(NMBB);
+ else if (Indexes)
+ Indexes->insertMBBInMaps(NMBB);
+
+ // On some targets like Mips, branches may kill virtual registers. Make sure
+ // that LiveVariables is properly updated after updateTerminator replaces the
+ // terminators.
+ LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();
+
+ // Collect a list of virtual registers killed by the terminators.
+ SmallVector<unsigned, 4> KilledRegs;
+ if (LV)
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ if (!OI->isReg() || OI->getReg() == 0 ||
+ !OI->isUse() || !OI->isKill() || OI->isUndef())
+ continue;
+ unsigned Reg = OI->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ LV->getVarInfo(Reg).removeKill(MI)) {
+ KilledRegs.push_back(Reg);
+ DEBUG(dbgs() << "Removing terminator kill: " << *MI);
+ OI->setIsKill(false);
+ }
+ }
+ }
+
+ SmallVector<unsigned, 4> UsedRegs;
+ if (LIS) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ if (!OI->isReg() || OI->getReg() == 0)
+ continue;
+
+ unsigned Reg = OI->getReg();
+ if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
+ ReplaceUsesOfBlockWith(Succ, NMBB);
+
+ // If updateTerminator() removes instructions, we need to remove them from
+ // SlotIndexes.
+ SmallVector<MachineInstr*, 4> Terminators;
+ if (Indexes) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I)
+ Terminators.push_back(I);
+ }
+
+ updateTerminator();
+
+ if (Indexes) {
+ SmallVector<MachineInstr*, 4> NewTerminators;
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I)
+ NewTerminators.push_back(I);
+
+ for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
+ E = Terminators.end(); I != E; ++I) {
+ if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
+ NewTerminators.end())
+ Indexes->removeMachineInstrFromMaps(*I);
+ }
+ }
+
+ // Insert unconditional "jump Succ" instruction in NMBB if necessary.
+ NMBB->addSuccessor(Succ);
+ if (!NMBB->isLayoutSuccessor(Succ)) {
+ Cond.clear();
+ MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+
+ if (Indexes) {
+ for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
+ I != E; ++I) {
+ // Some instructions may have been moved to NMBB by updateTerminator(),
+ // so we first remove any instruction that already has an index.
+ if (Indexes->hasIndex(I))
+ Indexes->removeMachineInstrFromMaps(I);
+ Indexes->insertMachineInstrInMaps(I);
+ }
+ }
+ }
+
+ // Fix PHI nodes in Succ so they refer to NMBB instead of this
+ for (MachineBasicBlock::instr_iterator
+ i = Succ->instr_begin(),e = Succ->instr_end();
+ i != e && i->isPHI(); ++i)
+ for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+ if (i->getOperand(ni+1).getMBB() == this)
+ i->getOperand(ni+1).setMBB(NMBB);
+
+ // Inherit live-ins from the successor
+ for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
+ E = Succ->livein_end(); I != E; ++I)
+ NMBB->addLiveIn(*I);
+
+ // Update LiveVariables.
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (LV) {
+ // Restore kills of virtual registers that were killed by the terminators.
+ while (!KilledRegs.empty()) {
+ unsigned Reg = KilledRegs.pop_back_val();
+ for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
+ if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ LV->getVarInfo(Reg).Kills.push_back(I);
+ DEBUG(dbgs() << "Restored terminator kill: " << *I);
+ break;
+ }
+ }
+ // Update relevant live-through information.
+ LV->addNewBlock(NMBB, this, Succ);
+ }
+
+ if (LIS) {
+ // After splitting the edge and updating SlotIndexes, live intervals may be
+ // in one of two situations, depending on whether this block was the last in
+ // the function. If the original block was the last in the function, all live
+ // intervals will end prior to the beginning of the new split block. If the
+ // original block was not at the end of the function, all live intervals will
+ // extend to the end of the new split block.
+
+ bool isLastMBB =
+ llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end();
+
+ SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
+ SlotIndex PrevIndex = StartIndex.getPrevSlot();
+ SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);
+
+ // Find the registers used from NMBB in PHIs in Succ.
+ SmallSet<unsigned, 8> PHISrcRegs;
+ for (MachineBasicBlock::instr_iterator
+ I = Succ->instr_begin(), E = Succ->instr_end();
+ I != E && I->isPHI(); ++I) {
+ for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
+ if (I->getOperand(ni+1).getMBB() == NMBB) {
+ MachineOperand &MO = I->getOperand(ni);
+ unsigned Reg = MO.getReg();
+ PHISrcRegs.insert(Reg);
+ if (MO.isUndef())
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI && "PHI sources should be live out of their predecessors.");
+ LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+ }
+ }
+ }
+
+ MachineRegisterInfo *MRI = &getParent()->getRegInfo();
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (!LI.liveAt(PrevIndex))
+ continue;
+
+ bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
+ if (isLiveOut && isLastMBB) {
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI && "LiveInterval should have VNInfo where it is live.");
+ LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+ } else if (!isLiveOut && !isLastMBB) {
+ LI.removeRange(StartIndex, EndIndex);
+ }
+ }
+
+ // Update all intervals for registers whose uses may have been modified by
+ // updateTerminator().
+ LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
+ }
+
+ if (MachineDominatorTree *MDT =
+ P->getAnalysisIfAvailable<MachineDominatorTree>()) {
+ // Update dominator information.
+ MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
+
+ bool IsNewIDom = true;
+ for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
+ PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (PredBB == NMBB)
+ continue;
+ if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
+ IsNewIDom = false;
+ break;
+ }
+ }
+
+ // We know "this" dominates the newly created basic block.
+ MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom)
+ MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
+ }
+
+ if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoop *TIL = MLI->getLoopFor(this)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NMBB joins loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == Succ &&
+ "Should not create irreducible loops!");
+ if (MachineLoop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NMBB, MLI->getBase());
+ }
+ }
+ }
+
+ return NMBB;
+}
+
+/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's
+/// neighboring instructions so the bundle won't be broken by removing MI.
+static void unbundleSingleMI(MachineInstr *MI) {
+ // Removing the first instruction in a bundle.
+ if (MI->isBundledWithSucc() && !MI->isBundledWithPred())
+ MI->unbundleFromSucc();
+ // Removing the last instruction in a bundle.
+ if (MI->isBundledWithPred() && !MI->isBundledWithSucc())
+ MI->unbundleFromPred();
+ // If MI is not bundled, or if it is internal to a bundle, the neighbor flags
+ // are already fine.
+}
+
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) {
+ unbundleSingleMI(I);
+ return Insts.erase(I);
+}
+
+MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) {
+ unbundleSingleMI(MI);
+ MI->clearFlag(MachineInstr::BundledPred);
+ MI->clearFlag(MachineInstr::BundledSucc);
+ return Insts.remove(MI);
+}
+
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
+ assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+ "Cannot insert instruction with bundle flags");
+ // Set the bundle flags when inserting inside a bundle.
+ if (I != instr_end() && I->isBundledWithPred()) {
+ MI->setFlag(MachineInstr::BundledPred);
+ MI->setFlag(MachineInstr::BundledSucc);
+ }
+ return Insts.insert(I, MI);
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing function,
+/// and returns it, but does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing function,
+/// and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->erase(this);
+}
+
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Cannot replace self with self!");
+
+ MachineBasicBlock::instr_iterator I = instr_end();
+ while (I != instr_begin()) {
+ --I;
+ if (!I->isTerminator()) break;
+
+ // Scan the operands of this machine instruction, replacing any uses of Old
+ // with New.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isMBB() &&
+ I->getOperand(i).getMBB() == Old)
+ I->getOperand(i).setMBB(New);
+ }
+
+ // Update the successor information.
+ replaceSuccessor(Old, New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be
+/// null.
+///
+/// Besides DestA and DestB, retain other edges leading to LandingPads
+/// (currently there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+ MachineBasicBlock *DestB,
+ bool isCond) {
+ // The values of DestA and DestB frequently come from a call to the
+ // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
+ // values from there.
+ //
+ // 1. If both DestA and DestB are null, then the block ends with no branches
+ // (it falls through to its successor).
+ // 2. If DestA is set, DestB is null, and isCond is false, then the block ends
+ // with only an unconditional branch.
+ // 3. If DestA is set, DestB is null, and isCond is true, then the block ends
+ // with a conditional branch that falls through to a successor (DestB).
+ // 4. If DestA and DestB is set and isCond is true, then the block ends with a
+ // conditional branch followed by an unconditional branch. DestA is the
+ // 'true' destination and DestB is the 'false' destination.
+
+ bool Changed = false;
+
+ MachineFunction::iterator FallThru =
+ llvm::next(MachineFunction::iterator(this));
+
+ if (DestA == 0 && DestB == 0) {
+ // Block falls through to successor.
+ DestA = FallThru;
+ DestB = FallThru;
+ } else if (DestA != 0 && DestB == 0) {
+ if (isCond)
+ // Block ends in conditional jump that falls through to successor.
+ DestB = FallThru;
+ } else {
+ assert(DestA && DestB && isCond &&
+ "CFG in a bad state. Cannot correct CFG edges");
+ }
+
+ // Remove superfluous edges. I.e., those which aren't destinations of this
+ // basic block, duplicate edges, or landing pads.
+ SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs;
+ MachineBasicBlock::succ_iterator SI = succ_begin();
+ while (SI != succ_end()) {
+ const MachineBasicBlock *MBB = *SI;
+ if (!SeenMBBs.insert(MBB) ||
+ (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+ // This is a superfluous edge, remove it.
+ SI = removeSuccessor(SI);
+ Changed = true;
+ } else {
+ ++SI;
+ }
+ }
+
+ return Changed;
+}
+
+/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
+/// any DBG_VALUE instructions. Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
+ DebugLoc DL;
+ instr_iterator E = instr_end();
+ if (MBBI == E)
+ return DL;
+
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ while (MBBI != E && MBBI->isDebugValue())
+ MBBI++;
+ if (MBBI != E)
+ DL = MBBI->getDebugLoc();
+ return DL;
+}
+
+/// getSuccWeight - Return weight of the edge from this block to MBB.
+///
+uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
+ if (Weights.empty())
+ return 0;
+
+ return *getWeightIterator(Succ);
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::succ_iterator I) {
+ assert(Weights.size() == Successors.size() && "Async weight list!");
+ size_t index = std::distance(Successors.begin(), I);
+ assert(index < Weights.size() && "Not a current successor!");
+ return Weights.begin() + index;
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::const_weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
+ assert(Weights.size() == Successors.size() && "Async weight list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Weights.size() && "Not a current successor!");
+ return Weights.begin() + index;
+}
+
+/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
+/// as of just before "MI".
+///
+/// Search is localised to a neighborhood of
+/// Neighborhood instructions before (searching for defs or kills) and N
+/// instructions after (searching just for defs) MI.
+MachineBasicBlock::LivenessQueryResult
+MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
+ unsigned Reg, MachineInstr *MI,
+ unsigned Neighborhood) {
+ unsigned N = Neighborhood;
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // Start by searching backwards from MI, looking for kills, reads or defs.
+
+ MachineBasicBlock::iterator I(MI);
+ // If this is the first insn in the block, don't search backwards.
+ if (I != MBB->begin()) {
+ do {
+ --I;
+
+ MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MIOperands(I).analyzePhysReg(Reg, TRI);
+
+ if (Analysis.Defines)
+ // Outputs happen after inputs so they take precedence if both are
+ // present.
+ return Analysis.DefinesDead ? LQR_Dead : LQR_Live;
+
+ if (Analysis.Kills || Analysis.Clobbers)
+ // Register killed, so isn't live.
+ return LQR_Dead;
+
+ else if (Analysis.ReadsOverlap)
+ // Defined or read without a previous kill - live.
+ return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
+
+ } while (I != MBB->begin() && --N > 0);
+ }
+
+ // Did we get to the start of the block?
+ if (I == MBB->begin()) {
+ // If so, the register's state is definitely defined by the live-in state.
+ for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
+ RAI.isValid(); ++RAI) {
+ if (MBB->isLiveIn(*RAI))
+ return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
+ }
+
+ return LQR_Dead;
+ }
+
+ N = Neighborhood;
+
+ // Try searching forwards from MI, looking for reads or defs.
+ I = MachineBasicBlock::iterator(MI);
+ // If this is the last insn in the block, don't search forwards.
+ if (I != MBB->end()) {
+ for (++I; I != MBB->end() && N > 0; ++I, --N) {
+ MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MIOperands(I).analyzePhysReg(Reg, TRI);
+
+ if (Analysis.ReadsOverlap)
+ // Used, therefore must have been live.
+ return (Analysis.Reads) ?
+ LQR_Live : LQR_OverlappingLive;
+
+ else if (Analysis.Clobbers || Analysis.Defines)
+ // Defined (but not read) therefore cannot have been live.
+ return LQR_Dead;
+ }
+ }
+
+ // At this point we have no idea of the liveness of the register.
+ return LQR_Unknown;
+}
+
+void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
+ bool t) {
+ OS << "BB#" << MBB->getNumber();
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
new file mode 100644
index 0000000..070daf2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -0,0 +1,61 @@
+//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+
+char MachineBlockFrequencyInfo::ID = 0;
+
+
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) {
+ initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+ MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
+ MachineBranchProbabilityInfo>();
+}
+
+MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {
+ delete MBFI;
+}
+
+void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
+ MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI->doFunction(&F, &MBPI);
+ return false;
+}
+
+/// getblockFreq - Return block frequency. Return 0 if we don't have the
+/// information. Please note that initial frequency is equal to 1024. It means
+/// that we should not rely on the value itself, but only on the comparison to
+/// the other block frequencies. We do this to avoid using of floating points.
+///
+BlockFrequency MachineBlockFrequencyInfo::
+getBlockFreq(const MachineBasicBlock *MBB) const {
+ return MBFI->getBlockFreq(MBB);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
new file mode 100644
index 0000000..cd948e2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -0,0 +1,1165 @@
+//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations using the CFG
+// structure and branch probability estimates.
+//
+// The pass strives to preserve the structure of the CFG (that is, retain
+// a topological ordering of basic blocks) in the absence of a *strong* signal
+// to the contrary from probabilities. However, within the CFG structure, it
+// attempts to choose an ordering which favors placing more likely sequences of
+// blocks adjacent to each other.
+//
+// The algorithm works from the inner-most loop within a function outward, and
+// at each stage walks through the basic blocks, trying to coalesce them into
+// sequential chains where allowed by the CFG (or demanded by heavy
+// probabilities). Finally, it walks the blocks in topological order, and the
+// first time it reaches a chain of basic blocks, it schedules them in the
+// function in-order.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement2"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCondBranches, "Number of conditional branches");
+STATISTIC(NumUncondBranches, "Number of uncondittional branches");
+STATISTIC(CondBranchTakenFreq,
+ "Potential frequency of taking conditional branches");
+STATISTIC(UncondBranchTakenFreq,
+ "Potential frequency of taking unconditional branches");
+
+namespace {
+class BlockChain;
+/// \brief Type for our function-wide basic block -> block chain mapping.
+typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+}
+
+namespace {
+/// \brief A chain of blocks which will be laid out contiguously.
+///
+/// This is the datastructure representing a chain of consecutive blocks that
+/// are profitable to layout together in order to maximize fallthrough
+/// probabilities and code locality. We also can use a block chain to represent
+/// a sequence of basic blocks which have some external (correctness)
+/// requirement for sequential layout.
+///
+/// Chains can be built around a single basic block and can be merged to grow
+/// them. They participate in a block-to-chain mapping, which is updated
+/// automatically as chains are merged together.
+class BlockChain {
+ /// \brief The sequence of blocks belonging to this chain.
+ ///
+ /// This is the sequence of blocks for a particular chain. These will be laid
+ /// out in-order within the function.
+ SmallVector<MachineBasicBlock *, 4> Blocks;
+
+ /// \brief A handle to the function-wide basic block to block chain mapping.
+ ///
+ /// This is retained in each block chain to simplify the computation of child
+ /// block chains for SCC-formation and iteration. We store the edges to child
+ /// basic blocks, and map them back to their associated chains using this
+ /// structure.
+ BlockToChainMapType &BlockToChain;
+
+public:
+ /// \brief Construct a new BlockChain.
+ ///
+ /// This builds a new block chain representing a single basic block in the
+ /// function. It also registers itself as the chain that block participates
+ /// in with the BlockToChain mapping.
+ BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
+ : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+ assert(BB && "Cannot create a chain with a null basic block");
+ BlockToChain[BB] = this;
+ }
+
+ /// \brief Iterator over blocks within the chain.
+ typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
+
+ /// \brief Beginning of blocks within the chain.
+ iterator begin() { return Blocks.begin(); }
+
+ /// \brief End of blocks within the chain.
+ iterator end() { return Blocks.end(); }
+
+ /// \brief Merge a block chain into this one.
+ ///
+ /// This routine merges a block chain into this one. It takes care of forming
+ /// a contiguous sequence of basic blocks, updating the edge list, and
+ /// updating the block -> chain mapping. It does not free or tear down the
+ /// old chain, but the old chain's block list is no longer valid.
+ void merge(MachineBasicBlock *BB, BlockChain *Chain) {
+ assert(BB);
+ assert(!Blocks.empty());
+
+ // Fast path in case we don't have a chain already.
+ if (!Chain) {
+ assert(!BlockToChain[BB]);
+ Blocks.push_back(BB);
+ BlockToChain[BB] = this;
+ return;
+ }
+
+ assert(BB == *Chain->begin());
+ assert(Chain->begin() != Chain->end());
+
+ // Update the incoming blocks to point to this chain, and add them to the
+ // chain structure.
+ for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
+ BI != BE; ++BI) {
+ Blocks.push_back(*BI);
+ assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
+ BlockToChain[*BI] = this;
+ }
+ }
+
+#ifndef NDEBUG
+ /// \brief Dump the blocks in this chain.
+ void dump() LLVM_ATTRIBUTE_USED {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ (*I)->dump();
+ }
+#endif // NDEBUG
+
+ /// \brief Count of predecessors within the loop currently being processed.
+ ///
+ /// This count is updated at each loop we process to represent the number of
+ /// in-loop predecessors of this chain.
+ unsigned LoopPredecessors;
+};
+}
+
+namespace {
+class MachineBlockPlacement : public MachineFunctionPass {
+ /// \brief A typedef for a block filter set.
+ typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+ /// \brief A handle to the loop info.
+ const MachineLoopInfo *MLI;
+
+ /// \brief A handle to the target's instruction info.
+ const TargetInstrInfo *TII;
+
+ /// \brief A handle to the target's lowering info.
+ const TargetLoweringBase *TLI;
+
+ /// \brief Allocator and owner of BlockChain structures.
+ ///
+ /// We build BlockChains lazily while processing the loop structure of
+ /// a function. To reduce malloc traffic, we allocate them using this
+ /// slab-like allocator, and destroy them after the pass completes. An
+ /// important guarantee is that this allocator produces stable pointers to
+ /// the chains.
+ SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
+
+ /// \brief Function wide BasicBlock to BlockChain mapping.
+ ///
+ /// This mapping allows efficiently moving from any given basic block to the
+ /// BlockChain it participates in, if any. We use it to, among other things,
+ /// allow implicitly defining edges between chains as the existing edges
+ /// between basic blocks.
+ DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+
+ void markChainSuccessors(BlockChain &Chain,
+ MachineBasicBlock *LoopHeaderBB,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ MachineFunction &F,
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
+ void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *findBestLoopTop(MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopExit(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ void buildLoopChains(MachineFunction &F, MachineLoop &L);
+ void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet);
+ void buildCFGChains(MachineFunction &F);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacement() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacement::ID = 0;
+char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+ "Branch Probability Basic Block Placement", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+ "Branch Probability Basic Block Placement", false, false)
+
+#ifndef NDEBUG
+/// \brief Helper to print the name of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockName(MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "BB#" << BB->getNumber()
+ << " (derived from LLVM BB '" << BB->getName() << "')";
+ OS.flush();
+ return Result;
+}
+
+/// \brief Helper to print the number of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockNum(MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "BB#" << BB->getNumber();
+ OS.flush();
+ return Result;
+}
+#endif
+
+/// \brief Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the worklist passed in.
+void MachineBlockPlacement::markChainSuccessors(
+ BlockChain &Chain,
+ MachineBasicBlock *LoopHeaderBB,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter) {
+ // Walk all the blocks in this chain, marking their successors as having
+ // a predecessor placed.
+ for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
+ CBI != CBE; ++CBI) {
+ // Add any successors for which this is the only un-placed in-loop
+ // predecessor to the worklist as a viable candidate for CFG-neutral
+ // placement. No subsequent placement of this block will violate the CFG
+ // shape, so we get to use heuristics to choose a favorable placement.
+ for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
+ SE = (*CBI)->succ_end();
+ SI != SE; ++SI) {
+ if (BlockFilter && !BlockFilter->count(*SI))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ // Disregard edges within a fixed chain, or edges to the loop header.
+ if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+ continue;
+
+ // This is a cross-chain edge that is within the loop, so decrement the
+ // loop predecessor count of the destination chain.
+ if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*SuccChain.begin());
+ }
+ }
+}
+
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
+ MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ const BranchProbability HotProb(4, 5); // 80%
+
+ MachineBasicBlock *BestSucc = 0;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we manually compute probabilities using the edge
+ // weights. This is suboptimal as it means that the somewhat subtle
+ // definition of edge weight semantics is encoded here as well. We should
+ // improve the MBPI interface to efficiently support query patterns such as
+ // this.
+ uint32_t BestWeight = 0;
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end();
+ SI != SE; ++SI) {
+ if (BlockFilter && !BlockFilter->count(*SI))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n");
+ continue;
+ }
+ if (*SI != *SuccChain.begin()) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+
+ // Only consider successors which are either "hot", or wouldn't violate
+ // any CFG constraints.
+ if (SuccChain.LoopPredecessors != 0) {
+ if (SuccProb < HotProb) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n");
+ continue;
+ }
+
+ // Make sure that a hot successor doesn't have a globally more important
+ // predecessor.
+ BlockFrequency CandidateEdgeFreq
+ = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+ bool BadCFGConflict = false;
+ for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
+ PE = (*SI)->pred_end();
+ PI != PE; ++PI) {
+ if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
+ BlockToChain[*PI] == &Chain)
+ continue;
+ BlockFrequency PredEdgeFreq
+ = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+ if (PredEdgeFreq >= CandidateEdgeFreq) {
+ BadCFGConflict = true;
+ break;
+ }
+ }
+ if (BadCFGConflict) {
+ DEBUG(dbgs() << " " << getBlockName(*SI)
+ << " -> non-cold CFG conflict\n");
+ continue;
+ }
+ }
+
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ << " (prob)"
+ << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
+ << "\n");
+ if (BestSucc && BestWeight >= SuccWeight)
+ continue;
+ BestSucc = *SI;
+ BestWeight = SuccWeight;
+ }
+ return BestSucc;
+}
+
+namespace {
+/// \brief Predicate struct to detect blocks already placed.
+class IsBlockPlaced {
+ const BlockChain &PlacedChain;
+ const BlockToChainMapType &BlockToChain;
+
+public:
+ IsBlockPlaced(const BlockChain &PlacedChain,
+ const BlockToChainMapType &BlockToChain)
+ : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
+
+ bool operator()(MachineBasicBlock *BB) const {
+ return BlockToChain.lookup(BB) == &PlacedChain;
+ }
+};
+}
+
+/// \brief Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve icache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter) {
+ // Once we need to walk the worklist looking for a candidate, cleanup the
+ // worklist of already placed entries.
+ // FIXME: If this shows up on profiles, it could be folded (at the cost of
+ // some code complexity) into the loop below.
+ WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+ IsBlockPlaced(Chain, BlockToChain)),
+ WorkList.end());
+
+ MachineBasicBlock *BestBlock = 0;
+ BlockFrequency BestFreq;
+ for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
+ WBE = WorkList.end();
+ WBI != WBE; ++WBI) {
+ BlockChain &SuccChain = *BlockToChain[*WBI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*WBI)
+ << " -> Already merged!\n");
+ continue;
+ }
+ assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
+ DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq
+ << " (freq)\n");
+ if (BestBlock && BestFreq >= CandidateFreq)
+ continue;
+ BestBlock = *WBI;
+ BestFreq = CandidateFreq;
+ }
+ return BestBlock;
+}
+
+/// \brief Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+ MachineFunction &F, const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter) {
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+ ++I) {
+ if (BlockFilter && !BlockFilter->count(I))
+ continue;
+ if (BlockToChain[I] != &PlacedChain) {
+ PrevUnplacedBlockIt = I;
+ // Now select the head of the chain to which the unplaced block belongs
+ // as the block to place. This will force the entire chain to be placed,
+ // and satisfies the requirements of merging chains.
+ return *BlockToChain[I]->begin();
+ }
+ }
+ return 0;
+}
+
+void MachineBlockPlacement::buildChain(
+ MachineBasicBlock *BB,
+ BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter) {
+ assert(BB);
+ assert(BlockToChain[BB] == &Chain);
+ MachineFunction &F = *BB->getParent();
+ MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+
+ MachineBasicBlock *LoopHeaderBB = BB;
+ markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ BB = *llvm::prior(Chain.end());
+ for (;;) {
+ assert(BB);
+ assert(BlockToChain[BB] == &Chain);
+ assert(*llvm::prior(Chain.end()) == BB);
+
+ // Look for the best viable successor if there is one to place immediately
+ // after this block.
+ MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+
+ // If an immediate successor isn't available, look for the best viable
+ // block among those we've identified as not violating the loop's CFG at
+ // this point. This won't be a fallthrough, but it will increase locality.
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
+
+ if (!BestSucc) {
+ BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+ BlockFilter);
+ if (!BestSucc)
+ break;
+
+ DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+ "layout successor until the CFG reduces\n");
+ }
+
+ // Place this block, updating the datastructures to reflect its placement.
+ BlockChain &SuccChain = *BlockToChain[BestSucc];
+ // Zero out LoopPredecessors for the successor we're about to merge in case
+ // we selected a successor that didn't fit naturally into the CFG.
+ SuccChain.LoopPredecessors = 0;
+ DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
+ << " to " << getBlockNum(BestSucc) << "\n");
+ markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ Chain.merge(BestSucc, &SuccChain);
+ BB = *llvm::prior(Chain.end());
+ }
+
+ DEBUG(dbgs() << "Finished forming chain for header block "
+ << getBlockNum(*Chain.begin()) << "\n");
+}
+
+/// \brief Find the best loop top block for layout.
+///
+/// Look for a block which is strictly better than the loop header for laying
+/// out at the top of the loop. This looks for one and only one pattern:
+/// a latch block with no conditional exit. This block will cause a conditional
+/// jump around it or will be the bottom of the loop if we lay it out in place,
+/// but if it it doesn't end up at the bottom of the loop for any reason,
+/// rotation alone won't fix it. Because such a block will always result in an
+/// unconditional jump (for the backedge) rotating it in front of the loop
+/// header is always profitable.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Check that the header hasn't been fused with a preheader block due to
+ // crazy branches. If it has, we need to start with the header at the top to
+ // prevent pulling the preheader into the loop body.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return L.getHeader();
+
+ DEBUG(dbgs() << "Finding best loop top for: "
+ << getBlockName(L.getHeader()) << "\n");
+
+ BlockFrequency BestPredFreq;
+ MachineBasicBlock *BestPred = 0;
+ for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
+ PE = L.getHeader()->pred_end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
+ << Pred->succ_size() << " successors, "
+ << MBFI->getBlockFreq(Pred) << " freq\n");
+ if (Pred->succ_size() > 1)
+ continue;
+
+ BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ if (!BestPred || PredFreq > BestPredFreq ||
+ (!(PredFreq < BestPredFreq) &&
+ Pred->isLayoutSuccessor(L.getHeader()))) {
+ BestPred = Pred;
+ BestPredFreq = PredFreq;
+ }
+ }
+
+ // If no direct predecessor is fine, just use the loop header.
+ if (!BestPred)
+ return L.getHeader();
+
+ // Walk backwards through any straight line of predecessors.
+ while (BestPred->pred_size() == 1 &&
+ (*BestPred->pred_begin())->succ_size() == 1 &&
+ *BestPred->pred_begin() != L.getHeader())
+ BestPred = *BestPred->pred_begin();
+
+ DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
+ return BestPred;
+}
+
+
+/// \brief Find the best loop exiting block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // We don't want to layout the loop linearly in all cases. If the loop header
+ // is just a normal basic block in the loop, we want to look for what block
+ // within the loop is the best one to layout at the top. However, if the loop
+ // header has be pre-merged into a chain due to predecessors not having
+ // analyzable branches, *and* the predecessor it is merged with is *not* part
+ // of the loop, rotating the header into the middle of the loop will create
+ // a non-contiguous range of blocks which is Very Bad. So start with the
+ // header and only rotate if safe.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return 0;
+
+ BlockFrequency BestExitEdgeFreq;
+ unsigned BestExitLoopDepth = 0;
+ MachineBasicBlock *ExitingBB = 0;
+ // If there are exits to outer loops, loop rotation can severely limit
+ // fallthrough opportunites unless it selects such an exit. Keep a set of
+ // blocks where rotating to exit with that block will reach an outer loop.
+ SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+ DEBUG(dbgs() << "Finding best loop exit for: "
+ << getBlockName(L.getHeader()) << "\n");
+ for (MachineLoop::block_iterator I = L.block_begin(),
+ E = L.block_end();
+ I != E; ++I) {
+ BlockChain &Chain = *BlockToChain[*I];
+ // Ensure that this block is at the end of a chain; otherwise it could be
+ // mid-way through an inner loop or a successor of an analyzable branch.
+ if (*I != *llvm::prior(Chain.end()))
+ continue;
+
+ // Now walk the successors. We need to establish whether this has a viable
+ // exiting successor and whether it has a viable non-exiting successor.
+ // We store the old exiting state and restore it if a viable looping
+ // successor isn't found.
+ MachineBasicBlock *OldExitingBB = ExitingBB;
+ BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+ bool HasLoopingSucc = false;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we use the internal weights and manually compute the
+ // probabilities to avoid quadratic behavior.
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
+ for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
+ SE = (*I)->succ_end();
+ SI != SE; ++SI) {
+ if ((*SI)->isLandingPad())
+ continue;
+ if (*SI == *I)
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ // Don't split chains, either this chain or the successor's chain.
+ if (&Chain == &SuccChain) {
+ DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (chain conflict)\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
+ if (LoopBlockSet.count(*SI)) {
+ DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+ HasLoopingSucc = true;
+ continue;
+ }
+
+ unsigned SuccLoopDepth = 0;
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
+ SuccLoopDepth = ExitLoop->getLoopDepth();
+ if (ExitLoop->contains(&L))
+ BlocksExitingToOuterLoop.insert(*I);
+ }
+
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
+ DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " [L:" << SuccLoopDepth
+ << "] (" << ExitEdgeFreq << ")\n");
+ // Note that we slightly bias this toward an existing layout successor to
+ // retain incoming order in the absence of better information.
+ // FIXME: Should we bias this more strongly? It's pretty weak.
+ if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
+ ExitEdgeFreq > BestExitEdgeFreq ||
+ ((*I)->isLayoutSuccessor(*SI) &&
+ !(ExitEdgeFreq < BestExitEdgeFreq))) {
+ BestExitEdgeFreq = ExitEdgeFreq;
+ ExitingBB = *I;
+ }
+ }
+
+ // Restore the old exiting state, no viable looping successor was found.
+ if (!HasLoopingSucc) {
+ ExitingBB = OldExitingBB;
+ BestExitEdgeFreq = OldBestExitEdgeFreq;
+ continue;
+ }
+ }
+ // Without a candidate exiting block or with only a single block in the
+ // loop, just use the loop header to layout the loop.
+ if (!ExitingBB || L.getNumBlocks() == 1)
+ return 0;
+
+ // Also, if we have exit blocks which lead to outer loops but didn't select
+ // one of them as the exiting block we are rotating toward, disable loop
+ // rotation altogether.
+ if (!BlocksExitingToOuterLoop.empty() &&
+ !BlocksExitingToOuterLoop.count(ExitingBB))
+ return 0;
+
+ DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
+ return ExitingBB;
+}
+
+/// \brief Attempt to rotate an exiting block to the bottom of the loop.
+///
+/// Once we have built a chain, try to rotate it to line up the hot exit block
+/// with fallthrough out of the loop if doing so doesn't introduce unnecessary
+/// branches. For example, if the loop has fallthrough into its header and out
+/// of its bottom already, don't rotate it.
+void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
+ MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet) {
+ if (!ExitingBB)
+ return;
+
+ MachineBasicBlock *Top = *LoopChain.begin();
+ bool ViableTopFallthrough = false;
+ for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
+ PE = Top->pred_end();
+ PI != PE; ++PI) {
+ BlockChain *PredChain = BlockToChain[*PI];
+ if (!LoopBlockSet.count(*PI) &&
+ (!PredChain || *PI == *llvm::prior(PredChain->end()))) {
+ ViableTopFallthrough = true;
+ break;
+ }
+ }
+
+ // If the header has viable fallthrough, check whether the current loop
+ // bottom is a viable exiting block. If so, bail out as rotating will
+ // introduce an unnecessary branch.
+ if (ViableTopFallthrough) {
+ MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end());
+ for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
+ SE = Bottom->succ_end();
+ SI != SE; ++SI) {
+ BlockChain *SuccChain = BlockToChain[*SI];
+ if (!LoopBlockSet.count(*SI) &&
+ (!SuccChain || *SI == *SuccChain->begin()))
+ return;
+ }
+ }
+
+ BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
+ ExitingBB);
+ if (ExitIt == LoopChain.end())
+ return;
+
+ std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end());
+}
+
+/// \brief Forms basic block chains from the natural loop structures.
+///
+/// These chains are designed to preserve the existing *structure* of the code
+/// as much as possible. We can then stitch the chains together in a way which
+/// both preserves the topological structure and minimizes taken conditional
+/// branches.
+void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
+ MachineLoop &L) {
+ // First recurse through any nested loops, building chains for those inner
+ // loops.
+ for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+ buildLoopChains(F, **LI);
+
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+
+ // First check to see if there is an obviously preferable top block for the
+ // loop. This will default to the header, but may end up as one of the
+ // predecessors to the header if there is one which will result in strictly
+ // fewer branches in the loop body.
+ MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
+
+ // If we selected just the header for the loop top, look for a potentially
+ // profitable exit block in the event that rotating the loop can eliminate
+ // branches by placing an exit edge at the bottom.
+ MachineBasicBlock *ExitingBB = 0;
+ if (LoopTop == L.getHeader())
+ ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
+
+ BlockChain &LoopChain = *BlockToChain[LoopTop];
+
+ // FIXME: This is a really lame way of walking the chains in the loop: we
+ // walk the blocks, and use a set to prevent visiting a particular chain
+ // twice.
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ assert(LoopChain.LoopPredecessors == 0);
+ UpdatedPreds.insert(&LoopChain);
+ for (MachineLoop::block_iterator BI = L.block_begin(),
+ BE = L.block_end();
+ BI != BE; ++BI) {
+ BlockChain &Chain = *BlockToChain[*BI];
+ if (!UpdatedPreds.insert(&Chain))
+ continue;
+
+ assert(Chain.LoopPredecessors == 0);
+ for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+ BCI != BCE; ++BCI) {
+ assert(BlockToChain[*BCI] == &Chain);
+ for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+ PE = (*BCI)->pred_end();
+ PI != PE; ++PI) {
+ if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+ continue;
+ ++Chain.LoopPredecessors;
+ }
+ }
+
+ if (Chain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*Chain.begin());
+ }
+
+ buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
+ rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
+
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadLoop = false;
+ if (LoopChain.LoopPredecessors) {
+ BadLoop = true;
+ dbgs() << "Loop chain contains a block without its preds placed!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+ }
+ for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
+ BCI != BCE; ++BCI) {
+ dbgs() << " ... " << getBlockName(*BCI) << "\n";
+ if (!LoopBlockSet.erase(*BCI)) {
+ // We don't mark the loop as bad here because there are real situations
+ // where this can occur. For example, with an unanalyzable fallthrough
+ // from a loop block to a non-loop block or vice versa.
+ dbgs() << "Loop chain contains a block not contained by the loop!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(*BCI) << "\n";
+ }
+ }
+
+ if (!LoopBlockSet.empty()) {
+ BadLoop = true;
+ for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
+ LBE = LoopBlockSet.end();
+ LBI != LBE; ++LBI)
+ dbgs() << "Loop contains blocks never placed into a chain!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(*LBI) << "\n";
+ }
+ assert(!BadLoop && "Detected problems with the placement of this loop.");
+ });
+}
+
+void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
+ // Ensure that every BB in the function has an associated chain to simplify
+ // the assumptions of the remaining algorithm.
+ SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ MachineBasicBlock *BB = FI;
+ BlockChain *Chain
+ = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ // Also, merge any blocks which we cannot reason about and must preserve
+ // the exact fallthrough behavior for.
+ for (;;) {
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+ break;
+
+ MachineFunction::iterator NextFI(llvm::next(FI));
+ MachineBasicBlock *NextBB = NextFI;
+ // Ensure that the layout successor is a viable block, as we know that
+ // fallthrough is a possibility.
+ assert(NextFI != FE && "Can't fallthrough past the last block.");
+ DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+ << getBlockName(BB) << " -> " << getBlockName(NextBB)
+ << "\n");
+ Chain->merge(NextBB, 0);
+ FI = NextFI;
+ BB = NextBB;
+ }
+ }
+
+ // Build any loop-based chains.
+ for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
+ ++LI)
+ buildLoopChains(F, **LI);
+
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ MachineBasicBlock *BB = &*FI;
+ BlockChain &Chain = *BlockToChain[BB];
+ if (!UpdatedPreds.insert(&Chain))
+ continue;
+
+ assert(Chain.LoopPredecessors == 0);
+ for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+ BCI != BCE; ++BCI) {
+ assert(BlockToChain[*BCI] == &Chain);
+ for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+ PE = (*BCI)->pred_end();
+ PI != PE; ++PI) {
+ if (BlockToChain[*PI] == &Chain)
+ continue;
+ ++Chain.LoopPredecessors;
+ }
+ }
+
+ if (Chain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*Chain.begin());
+ }
+
+ BlockChain &FunctionChain = *BlockToChain[&F.front()];
+ buildChain(&F.front(), FunctionChain, BlockWorkList);
+
+ typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadFunc = false;
+ FunctionBlockSetType FunctionBlockSet;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ FunctionBlockSet.insert(FI);
+
+ for (BlockChain::iterator BCI = FunctionChain.begin(),
+ BCE = FunctionChain.end();
+ BCI != BCE; ++BCI)
+ if (!FunctionBlockSet.erase(*BCI)) {
+ BadFunc = true;
+ dbgs() << "Function chain contains a block not in the function!\n"
+ << " Bad block: " << getBlockName(*BCI) << "\n";
+ }
+
+ if (!FunctionBlockSet.empty()) {
+ BadFunc = true;
+ for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
+ FBE = FunctionBlockSet.end();
+ FBI != FBE; ++FBI)
+ dbgs() << "Function contains blocks never placed into a chain!\n"
+ << " Bad block: " << getBlockName(*FBI) << "\n";
+ }
+ assert(!BadFunc && "Detected problems with the block placement.");
+ });
+
+ // Splice the blocks into place.
+ MachineFunction::iterator InsertPos = F.begin();
+ for (BlockChain::iterator BI = FunctionChain.begin(),
+ BE = FunctionChain.end();
+ BI != BE; ++BI) {
+ DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(*BI) << "\n");
+ if (InsertPos != MachineFunction::iterator(*BI))
+ F.splice(InsertPos, *BI);
+ else
+ ++InsertPos;
+
+ // Update the terminator of the previous block.
+ if (BI == FunctionChain.begin())
+ continue;
+ MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
+
+ // FIXME: It would be awesome of updateTerminator would just return rather
+ // than assert when the branch cannot be analyzed in order to remove this
+ // boiler plate.
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // If PrevBB has a two-way branch, try to re-order the branches
+ // such that we branch to the successor with higher weight first.
+ if (TBB && !Cond.empty() && FBB &&
+ MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+ !TII->ReverseBranchCondition(Cond)) {
+ DEBUG(dbgs() << "Reverse order of the two branches: "
+ << getBlockName(PrevBB) << "\n");
+ DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
+ << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PrevBB);
+ TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
+ }
+ PrevBB->updateTerminator();
+ }
+ }
+
+ // Fixup the last block.
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
+ F.back().updateTerminator();
+
+ // Walk through the backedges of the function now that we have fully laid out
+ // the basic blocks and align the destination of each backedge. We don't rely
+ // exclusively on the loop info here so that we can align backedges in
+ // unnatural CFGs and backedges that were introduced purely because of the
+ // loop rotations done during this layout pass.
+ if (F.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+ return;
+ unsigned Align = TLI->getPrefLoopAlignment();
+ if (!Align)
+ return; // Don't care about loop alignment.
+ if (FunctionChain.begin() == FunctionChain.end())
+ return; // Empty chain.
+
+ const BranchProbability ColdProb(1, 5); // 20%
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+ BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+ for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
+ BE = FunctionChain.end();
+ BI != BE; ++BI) {
+ // Don't align non-looping basic blocks. These are unlikely to execute
+ // enough times to matter in practice. Note that we'll still handle
+ // unnatural CFGs inside of a natural outer loop (the common case) and
+ // rotated loops.
+ MachineLoop *L = MLI->getLoopFor(*BI);
+ if (!L)
+ continue;
+
+ // If the block is cold relative to the function entry don't waste space
+ // aligning it.
+ BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+ if (Freq < WeightedEntryFreq)
+ continue;
+
+ // If the block is cold relative to its loop header, don't align it
+ // regardless of what edges into the block exist.
+ MachineBasicBlock *LoopHeader = L->getHeader();
+ BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+ if (Freq < (LoopHeaderFreq * ColdProb))
+ continue;
+
+ // Check for the existence of a non-layout predecessor which would benefit
+ // from aligning this block.
+ MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+
+ // Force alignment if all the predecessors are jumps. We already checked
+ // that the block isn't cold above.
+ if (!LayoutPred->isSuccessor(*BI)) {
+ (*BI)->setAlignment(Align);
+ continue;
+ }
+
+ // Align this block if the layout predecessor's edge into this block is
+ // cold relative to the block. When this is true, other predecessors make up
+ // all of the hot entries into the block and thus alignment is likely to be
+ // important.
+ BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+ BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+ if (LayoutEdgeFreq <= (Freq * ColdProb))
+ (*BI)->setAlignment(Align);
+ }
+}
+
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (llvm::next(F.begin()) == F.end())
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ TII = F.getTarget().getInstrInfo();
+ TLI = F.getTarget().getTargetLowering();
+ assert(BlockToChain.empty());
+
+ buildCFGChains(F);
+
+ BlockToChain.clear();
+ ChainAllocator.DestroyAll();
+
+ // We always return true as we have no way to track whether the final order
+ // differs from the original order.
+ return true;
+}
+
+namespace {
+/// \brief A pass to compute block placement statistics.
+///
+/// A separate pass to compute interesting statistics for evaluating block
+/// placement. This is separate from the actual placement pass so that they can
+/// be computed in the absence of any placement transformations or when using
+/// alternative placement strategies.
+class MachineBlockPlacementStats : public MachineFunctionPass {
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacementStats() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacementStats::ID = 0;
+char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+
+bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (llvm::next(F.begin()) == F.end())
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
+ Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
+ : NumUncondBranches;
+ Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
+ : UncondBranchTakenFreq;
+ for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end();
+ SI != SE; ++SI) {
+ // Skip if this successor is a fallthrough.
+ if (I->isLayoutSuccessor(*SI))
+ continue;
+
+ BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+ ++NumBranches;
+ BranchTakenFreq += EdgeFreq.getFrequency();
+ }
+ }
+
+ return false;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 0000000..ae70912
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,126 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+void MachineBranchProbabilityInfo::anchor() { }
+
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
+ // First we compute the sum with 64-bits of precision, ensuring that cannot
+ // overflow by bounding the number of weights considered. Hopefully no one
+ // actually needs 2^32 successors.
+ assert(MBB->succ_size() < UINT32_MAX);
+ uint64_t Sum = 0;
+ Scale = 1;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ uint32_t Weight = getEdgeWeight(MBB, I);
+ Sum += Weight;
+ }
+
+ // If the computed sum fits in 32-bits, we're done.
+ if (Sum <= UINT32_MAX)
+ return Sum;
+
+ // Otherwise, compute the scale necessary to cause the weights to fit, and
+ // re-sum with that scale applied.
+ assert((Sum / UINT32_MAX) < UINT32_MAX);
+ Scale = (Sum / UINT32_MAX) + 1;
+ Sum = 0;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ uint32_t Weight = getEdgeWeight(MBB, I);
+ Sum += Weight / Scale;
+ }
+ assert(Sum <= UINT32_MAX);
+ return Sum;
+}
+
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
+ uint32_t Weight = Src->getSuccWeight(Dst);
+ if (!Weight)
+ return DEFAULT_WEIGHT;
+ return Weight;
+}
+
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ // This is a linear search. Try to use the const_succ_iterator version when
+ // possible.
+ return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ // Hot probability is at least 4/5 = 80%
+ // FIXME: Compare against a static "hot" BranchProbability.
+ return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
+}
+
+MachineBasicBlock *
+MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
+ uint32_t MaxWeight = 0;
+ MachineBasicBlock *MaxSucc = 0;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ uint32_t Weight = getEdgeWeight(MBB, I);
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxSucc = *I;
+ }
+ }
+
+ if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
+ return MaxSucc;
+
+ return 0;
+}
+
+BranchProbability
+MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ uint32_t Scale = 1;
+ uint32_t D = getSumForBlock(Src, Scale);
+ uint32_t N = getEdgeWeight(Src, Dst) / Scale;
+
+ return BranchProbability(N, D);
+}
+
+raw_ostream &MachineBranchProbabilityInfo::
+printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+
+ const BranchProbability Prob = getEdgeProbability(Src, Dst);
+ OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+ << " probability is " << Prob
+ << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+ return OS;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 0000000..61d8d38
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,661 @@
+//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-cse"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+STATISTIC(NumPhysCSEs,
+ "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+ "Number of cross-MBB physreg referencing CS eliminated");
+STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
+
+namespace {
+ class MachineCSE : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ AliasAnalysis *AA;
+ MachineDominatorTree *DT;
+ MachineRegisterInfo *MRI;
+ public:
+ static char ID; // Pass identification
+ MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+ initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+
+ virtual void releaseMemory() {
+ ScopeMap.clear();
+ Exps.clear();
+ }
+
+ private:
+ const unsigned LookAheadLimit;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+ typedef ScopedHashTable<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+ typedef ScopedHTType::ScopeTy ScopeType;
+ DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
+ ScopedHTType VNT;
+ SmallVector<MachineInstr*, 64> Exps;
+ unsigned CurrVN;
+
+ bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const;
+ bool hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &PhysUseDef) const;
+ bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &NonLocal) const;
+ bool isCSECandidate(MachineInstr *MI);
+ bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ MachineInstr *CSMI, MachineInstr *MI);
+ void EnterScope(MachineBasicBlock *MBB);
+ void ExitScope(MachineBasicBlock *MBB);
+ bool ProcessBlock(MachineBasicBlock *MBB);
+ void ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
+ bool PerformCSE(MachineDomTreeNode *Node);
+ };
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+char &llvm::MachineCSEID = MachineCSE::ID;
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+
+bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (!MRI->hasOneNonDBGUse(Reg))
+ // Only coalesce single use copies. This ensure the copy will be
+ // deleted.
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI->isCopy())
+ continue;
+ unsigned SrcReg = DefMI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ continue;
+ if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+ continue;
+ if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
+ continue;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MO.setReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool
+MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const {
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I->isDebugValue())
+ ++I;
+
+ if (I == E)
+ // Reached end of block, register is obviously dead.
+ return true;
+
+ bool SeenDef = false;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ SeenDef = true;
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (!TRI->regsOverlap(MO.getReg(), Reg))
+ continue;
+ if (MO.isUse())
+ // Found a use!
+ return false;
+ SeenDef = true;
+ }
+ if (SeenDef)
+ // See a def of Reg (or an alias) before encountering any use, it's
+ // trivially dead.
+ return true;
+
+ --LookAheadLeft;
+ ++I;
+ }
+ return false;
+}
+
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
+/// physical registers (except for dead defs of physical registers). It also
+/// returns the physical register def by reference if it's the only one and the
+/// instruction does not uses a physical register.
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &PhysUseDef) const{
+ // First, add all uses to PhysRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // Reading constant physregs is ok.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
+ }
+
+ // Next, collect all defs into PhysDefs. If any is already in PhysRefs
+ // (which currently contains only uses), set the PhysUseDef flag.
+ PhysUseDef = false;
+ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // Check against PhysRefs even if the def is "dead".
+ if (PhysRefs.count(Reg))
+ PhysUseDef = true;
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
+ // common since this pass is run before livevariables. We can scan
+ // forward a few instructions and check if it is obviously dead.
+ if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
+ PhysDefs.push_back(Reg);
+ }
+
+ // Finally, add all defs to PhysRefs as well.
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
+ for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
+
+ return !PhysRefs.empty();
+}
+
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &NonLocal) const {
+ // For now conservatively returns false if the common subexpression is
+ // not in the same basic block as the given instruction. The only exception
+ // is if the common subexpression is in the sole predecessor block.
+ const MachineBasicBlock *MBB = MI->getParent();
+ const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+ bool CrossMBB = false;
+ if (CSMBB != MBB) {
+ if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
+ return false;
+
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
+ if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
+ // Avoid extending live range of physical registers if they are
+ //allocatable or reserved.
+ return false;
+ }
+ CrossMBB = true;
+ }
+ MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
+ MachineBasicBlock::const_iterator E = MI;
+ MachineBasicBlock::const_iterator EE = CSMBB->end();
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I != EE && I->isDebugValue())
+ ++I;
+
+ if (I == EE) {
+ assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+ (void)CrossMBB;
+ CrossMBB = false;
+ NonLocal = true;
+ I = MBB->begin();
+ EE = MBB->end();
+ continue;
+ }
+
+ if (I == E)
+ return true;
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ // RegMasks go on instructions like calls that clobber lots of physregs.
+ // Don't attempt to CSE across such an instruction.
+ if (MO.isRegMask())
+ return false;
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ continue;
+ if (PhysRefs.count(MOReg))
+ return false;
+ }
+
+ --LookAheadLeft;
+ ++I;
+ }
+
+ return false;
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+ if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
+ return false;
+
+ // Ignore copies.
+ if (MI->isCopyLike())
+ return false;
+
+ // Ignore stuff that we obviously can't move.
+ if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ if (MI->mayLoad()) {
+ // Okay, this instruction does a load. As a refinement, we allow the target
+ // to decide whether the loaded value is actually a constant. If so, we can
+ // actually use it as a load.
+ if (!MI->isInvariantLoad(AA))
+ // FIXME: we should be able to hoist loads with no other side effects if
+ // there are no other instructions which can change memory in this loop.
+ // This is a trivial form of alias analysis.
+ return false;
+ }
+ return true;
+}
+
+/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
+/// common expression that defines Reg.
+bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ MachineInstr *CSMI, MachineInstr *MI) {
+ // FIXME: Heuristics that works around the lack the live range splitting.
+
+ // If CSReg is used at all uses of Reg, CSE should not increase register
+ // pressure of CSReg.
+ bool MayIncreasePressure = true;
+ if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MayIncreasePressure = false;
+ SmallPtrSet<MachineInstr*, 8> CSUses;
+ for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ CSUses.insert(Use);
+ }
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ if (!CSUses.count(Use)) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
+ }
+ if (!MayIncreasePressure) return true;
+
+ // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+ // an immediate predecessor. We don't want to increase register pressure and
+ // end up causing other computation to be spilled.
+ if (MI->isAsCheapAsAMove()) {
+ MachineBasicBlock *CSBB = CSMI->getParent();
+ MachineBasicBlock *BB = MI->getParent();
+ if (CSBB != BB && !CSBB->isSuccessor(BB))
+ return false;
+ }
+
+ // Heuristics #2: If the expression doesn't not use a vr and the only use
+ // of the redundant computation are copies, do not cse.
+ bool HasVRegUse = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ HasVRegUse = true;
+ break;
+ }
+ }
+ if (!HasVRegUse) {
+ bool HasNonCopyUse = false;
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ // Ignore copies.
+ if (!Use->isCopyLike()) {
+ HasNonCopyUse = true;
+ break;
+ }
+ }
+ if (!HasNonCopyUse)
+ return false;
+ }
+
+ // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
+ // it unless the defined value is already used in the BB of the new use.
+ bool HasPHI = false;
+ SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ HasPHI |= Use->isPHI();
+ CSBBs.insert(Use->getParent());
+ }
+
+ if (!HasPHI)
+ return true;
+ return CSBBs.count(MI->getParent());
+}
+
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+ ScopeType *Scope = new ScopeType(VNT);
+ ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+ assert(SI != ScopeMap.end());
+ delete SI->second;
+ ScopeMap.erase(SI);
+}
+
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ SmallVector<unsigned, 2> ImplicitDefsToUpdate;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (!isCSECandidate(MI))
+ continue;
+
+ bool FoundCSE = VNT.count(MI);
+ if (!FoundCSE) {
+ // Look for trivial copy coalescing opportunities.
+ if (PerformTrivialCoalescing(MI, MBB)) {
+ Changed = true;
+
+ // After coalescing MI itself may become a copy.
+ if (MI->isCopyLike())
+ continue;
+ FoundCSE = VNT.count(MI);
+ }
+ }
+
+ // Commute commutable instructions.
+ bool Commuted = false;
+ if (!FoundCSE && MI->isCommutable()) {
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+ if (NewMI) {
+ Commuted = true;
+ FoundCSE = VNT.count(NewMI);
+ if (NewMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ Changed = true;
+ } else if (!FoundCSE)
+ // MI was changed but it didn't help, commute it back!
+ (void)TII->commuteInstruction(MI);
+ }
+ }
+
+ // If the instruction defines physical registers and the values *may* be
+ // used, then it's not safe to replace it with a common subexpression.
+ // It's also not safe if the instruction uses physical registers.
+ bool CrossMBBPhysDef = false;
+ SmallSet<unsigned, 8> PhysRefs;
+ SmallVector<unsigned, 2> PhysDefs;
+ bool PhysUseDef = false;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
+ PhysDefs, PhysUseDef)) {
+ FoundCSE = false;
+
+ // ... Unless the CS is local or is in the sole predecessor block
+ // and it also defines the physical register which is not clobbered
+ // in between and the physical register uses were not clobbered.
+ // This can never be the case if the instruction both uses and
+ // defines the same physical register, which was detected above.
+ if (!PhysUseDef) {
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+ FoundCSE = true;
+ }
+ }
+
+ if (!FoundCSE) {
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ continue;
+ }
+
+ // Found a common subexpression, eliminate it.
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ DEBUG(dbgs() << "Examining: " << *MI);
+ DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+
+ // Check if it's profitable to perform this CSE.
+ bool DoCSE = true;
+ unsigned NumDefs = MI->getDesc().getNumDefs() +
+ MI->getDesc().getNumImplicitDefs();
+
+ for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned OldReg = MO.getReg();
+ unsigned NewReg = CSMI->getOperand(i).getReg();
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+ ImplicitDefsToUpdate.push_back(i);
+ if (OldReg == NewReg) {
+ --NumDefs;
+ continue;
+ }
+
+ assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
+ TargetRegisterInfo::isVirtualRegister(NewReg) &&
+ "Do not CSE physical register defs!");
+
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
+ DoCSE = false;
+ break;
+ }
+
+ // Don't perform CSE if the result of the old instruction cannot exist
+ // within the register class of the new instruction.
+ const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
+ if (!MRI->constrainRegClass(NewReg, OldRC)) {
+ DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
+ DoCSE = false;
+ break;
+ }
+
+ CSEPairs.push_back(std::make_pair(OldReg, NewReg));
+ --NumDefs;
+ }
+
+ // Actually perform the elimination.
+ if (DoCSE) {
+ for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
+ MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
+ MRI->clearKillFlags(CSEPairs[i].second);
+ }
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
+ CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+
+ if (CrossMBBPhysDef) {
+ // Add physical register defs now coming in from a predecessor to MBB
+ // livein list.
+ while (!PhysDefs.empty()) {
+ unsigned LiveIn = PhysDefs.pop_back_val();
+ if (!MBB->isLiveIn(LiveIn))
+ MBB->addLiveIn(LiveIn);
+ }
+ ++NumCrossBBCSEs;
+ }
+
+ MI->eraseFromParent();
+ ++NumCSEs;
+ if (!PhysRefs.empty())
+ ++NumPhysCSEs;
+ if (Commuted)
+ ++NumCommutes;
+ Changed = true;
+ } else {
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ }
+ CSEPairs.clear();
+ ImplicitDefsToUpdate.clear();
+ }
+
+ return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
+ if (OpenChildren[Node])
+ return;
+
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = Node->getIDom()) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ CurrVN = 0;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(Node);
+ do {
+ Node = WorkList.pop_back_val();
+ Scopes.push_back(Node);
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+ OpenChildren[Node] = NumChildren;
+ for (unsigned i = 0; i != NumChildren; ++i) {
+ MachineDomTreeNode *Child = Children[i];
+ WorkList.push_back(Child);
+ }
+ } while (!WorkList.empty());
+
+ // Now perform CSE.
+ bool Changed = false;
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = Scopes[i];
+ MachineBasicBlock *MBB = Node->getBlock();
+ EnterScope(MBB);
+ Changed |= ProcessBlock(MBB);
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren);
+ }
+
+ return Changed;
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<MachineDominatorTree>();
+ return PerformCSE(DT->getRootNode());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp
new file mode 100644
index 0000000..81b4978
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+
+using namespace llvm;
+
+void MachineCodeEmitter::anchor() { }
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644
index 0000000..dc8a224
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -0,0 +1,334 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-cp"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+
+namespace {
+ class MachineCopyPropagation : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineCopyPropagation() : MachineFunctionPass(ID) {
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef SmallVector<unsigned, 4> DestList;
+ typedef DenseMap<unsigned, DestList> SourceMap;
+
+ void SourceNoLongerAvailable(unsigned Reg,
+ SourceMap &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
+ bool CopyPropagateBlock(MachineBasicBlock &MBB);
+ void removeCopy(MachineInstr *MI);
+ };
+}
+char MachineCopyPropagation::ID = 0;
+char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
+
+INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+ "Machine Copy Propagation Pass", false, false)
+
+void
+MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
+ SourceMap &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ SourceMap::iterator SI = SrcMap.find(*AI);
+ if (SI != SrcMap.end()) {
+ const DestList& Defs = SI->second;
+ for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ unsigned MappedDef = *I;
+ // Source of copy is no longer available for propagation.
+ if (AvailCopyMap.erase(MappedDef)) {
+ for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
+ AvailCopyMap.erase(*SR);
+ }
+ }
+ }
+ }
+}
+
+static bool NoInterveningSideEffect(const MachineInstr *CopyMI,
+ const MachineInstr *MI) {
+ const MachineBasicBlock *MBB = CopyMI->getParent();
+ if (MI->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_iterator I = CopyMI;
+ MachineBasicBlock::const_iterator E = MBB->end();
+ MachineBasicBlock::const_iterator E2 = MI;
+
+ ++I;
+ while (I != E && I != E2) {
+ if (I->hasUnmodeledSideEffects() || I->isCall() ||
+ I->isTerminator())
+ return false;
+ ++I;
+ }
+ return true;
+}
+
+/// isNopCopy - Return true if the specified copy is really a nop. That is
+/// if the source of the copy is the same of the definition of the copy that
+/// supplied the source. If the source of the copy is a sub-register than it
+/// must check the sub-indices match. e.g.
+/// ecx = mov eax
+/// al = mov cl
+/// But not
+/// ecx = mov eax
+/// al = mov ch
+static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
+ const TargetRegisterInfo *TRI) {
+ unsigned SrcSrc = CopyMI->getOperand(1).getReg();
+ if (Def == SrcSrc)
+ return true;
+ if (TRI->isSubRegister(SrcSrc, Def)) {
+ unsigned SrcDef = CopyMI->getOperand(0).getReg();
+ unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def);
+ if (!SubIdx)
+ return false;
+ return SubIdx == TRI->getSubRegIndex(SrcDef, Src);
+ }
+
+ return false;
+}
+
+// Remove MI from the function because it has been determined it is dead.
+// Turn it into a noop KILL instruction if it has super-register liveness
+// adjustments.
+void MachineCopyPropagation::removeCopy(MachineInstr *MI) {
+ if (MI->getNumOperands() == 2)
+ MI->eraseFromParent();
+ else
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+}
+
+bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+ SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
+ DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map
+ DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map
+ SourceMap SrcMap; // Src -> Def map
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (MI->isCopy()) {
+ unsigned Def = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Def) ||
+ TargetRegisterInfo::isVirtualRegister(Src))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
+ if (CI != AvailCopyMap.end()) {
+ MachineInstr *CopyMI = CI->second;
+ if (!MRI->isReserved(Def) &&
+ (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+ isNopCopy(CopyMI, Def, Src, TRI)) {
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ECX<def> = COPY %EAX<kill>
+ // ... nothing clobbered EAX.
+ // %EAX<def> = COPY %ECX
+ // =>
+ // %ECX<def> = COPY %EAX
+ //
+ // Also avoid eliminating a copy from reserved registers unless the
+ // definition is proven not clobbered. e.g.
+ // %RSP<def> = COPY %RAX
+ // CALL
+ // %RAX<def> = COPY %RSP
+
+ // Clear any kills of Def between CopyMI and MI. This extends the
+ // live range.
+ for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
+ I->clearRegisterKills(Def, TRI);
+
+ removeCopy(MI);
+ Changed = true;
+ ++NumDeletes;
+ continue;
+ }
+ }
+
+ // If Src is defined by a previous copy, it cannot be eliminated.
+ for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
+ CI = CopyMap.find(*AI);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+
+ // Copy is now a candidate for deletion.
+ MaybeDeadCopies.insert(MI);
+
+ // If 'Src' is previously source of another copy, then this earlier copy's
+ // source is no longer available. e.g.
+ // %xmm9<def> = copy %xmm2
+ // ...
+ // %xmm2<def> = copy %xmm0
+ // ...
+ // %xmm2<def> = copy %xmm9
+ SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);
+
+ // Remember Def is defined by the copy.
+ // ... Make sure to clear the def maps of aliases first.
+ for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
+ }
+ CopyMap[Def] = MI;
+ AvailCopyMap[Def] = MI;
+ for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) {
+ CopyMap[*SR] = MI;
+ AvailCopyMap[*SR] = MI;
+ }
+
+ // Remember source that's copied to Def. Once it's clobbered, then
+ // it's no longer available for copy propagation.
+ if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) ==
+ SrcMap[Src].end()) {
+ SrcMap[Src].push_back(Def);
+ }
+
+ continue;
+ }
+
+ // Not a copy.
+ SmallVector<unsigned, 2> Defs;
+ int RegMaskOpNum = -1;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ RegMaskOpNum = i;
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ continue;
+ }
+
+ // If 'Reg' is defined by a copy, the copy is no longer a candidate
+ // for elimination.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+ }
+
+ // The instruction has a register mask operand which means that it clobbers
+ // a large set of registers. It is possible to use the register mask to
+ // prune the available copies, but treat it like a basic block boundary for
+ // now.
+ if (RegMaskOpNum >= 0) {
+ // Erase any MaybeDeadCopies whose destination register is clobbered.
+ const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum);
+ for (SmallSetVector<MachineInstr*, 8>::iterator
+ DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+ DI != DE; ++DI) {
+ unsigned Reg = (*DI)->getOperand(0).getReg();
+ if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
+ continue;
+ removeCopy(*DI);
+ Changed = true;
+ ++NumDeletes;
+ }
+
+ // Clear all data structures as if we were beginning a new basic block.
+ MaybeDeadCopies.clear();
+ AvailCopyMap.clear();
+ CopyMap.clear();
+ SrcMap.clear();
+ continue;
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+
+ // No longer defined by a copy.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
+ }
+
+ // If 'Reg' is previously source of a copy, it is no longer available for
+ // copy propagation.
+ SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
+ }
+ }
+
+ // If MBB doesn't have successors, delete the copies whose defs are not used.
+ // If MBB does have successors, then conservative assume the defs are live-out
+ // since we don't want to trust live-in lists.
+ if (MBB.succ_empty()) {
+ for (SmallSetVector<MachineInstr*, 8>::iterator
+ DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+ DI != DE; ++DI) {
+ if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
+ removeCopy(*DI);
+ Changed = true;
+ ++NumDeletes;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= CopyPropagateBlock(*I);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 0000000..04c8ecb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,59 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace llvm {
+TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+}
+
+char MachineDominatorTree::ID = 0;
+
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+ "MachineDominator Tree Construction", true, true)
+
+char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+
+ return false;
+}
+
+MachineDominatorTree::MachineDominatorTree()
+ : MachineFunctionPass(ID) {
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ DT = new DominatorTreeBase<MachineBasicBlock>(false);
+}
+
+MachineDominatorTree::~MachineDominatorTree() {
+ delete DT;
+}
+
+void MachineDominatorTree::releaseMemory() {
+ DT->releaseMemory();
+}
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+ DT->print(OS);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 0000000..04321f3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,898 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function. This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineFunction implementation
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() {}
+
+void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+ MBB->getParent()->DeleteMachineBasicBlock(MBB);
+}
+
+MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
+ unsigned FunctionNum, MachineModuleInfo &mmi,
+ GCModuleInfo* gmi)
+ : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
+ if (TM.getRegisterInfo())
+ RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
+ else
+ RegInfo = 0;
+ MFInfo = 0;
+ FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(),
+ TM.Options.RealignStack);
+ if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment))
+ FrameInfo->ensureMaxAlignment(Fn->getAttributes().
+ getStackAlignment(AttributeSet::FunctionIndex));
+ ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout());
+ Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
+ // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+ if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize))
+ Alignment = std::max(Alignment,
+ TM.getTargetLowering()->getPrefFunctionAlignment());
+ FunctionNumber = FunctionNum;
+ JumpTableInfo = 0;
+}
+
+MachineFunction::~MachineFunction() {
+ // Don't call destructors on MachineInstr and MachineOperand. All of their
+ // memory comes from the BumpPtrAllocator which is about to be purged.
+ //
+ // Do call MachineBasicBlock destructors, it contains std::vectors.
+ for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I))
+ I->Insts.clearAndLeakNodesUnsafely();
+
+ InstructionRecycler.clear(Allocator);
+ OperandRecycler.clear(Allocator);
+ BasicBlockRecycler.clear(Allocator);
+ if (RegInfo) {
+ RegInfo->~MachineRegisterInfo();
+ Allocator.Deallocate(RegInfo);
+ }
+ if (MFInfo) {
+ MFInfo->~MachineFunctionInfo();
+ Allocator.Deallocate(MFInfo);
+ }
+
+ FrameInfo->~MachineFrameInfo();
+ Allocator.Deallocate(FrameInfo);
+
+ ConstantPool->~MachineConstantPool();
+ Allocator.Deallocate(ConstantPool);
+
+ if (JumpTableInfo) {
+ JumpTableInfo->~MachineJumpTableInfo();
+ Allocator.Deallocate(JumpTableInfo);
+ }
+}
+
+/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
+/// does already exist, allocate one.
+MachineJumpTableInfo *MachineFunction::
+getOrCreateJumpTableInfo(unsigned EntryKind) {
+ if (JumpTableInfo) return JumpTableInfo;
+
+ JumpTableInfo = new (Allocator)
+ MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
+ return JumpTableInfo;
+}
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them. This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function. If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+ if (empty()) { MBBNumbering.clear(); return; }
+ MachineFunction::iterator MBBI, E = end();
+ if (MBB == 0)
+ MBBI = begin();
+ else
+ MBBI = MBB;
+
+ // Figure out the block number this should have.
+ unsigned BlockNo = 0;
+ if (MBBI != begin())
+ BlockNo = prior(MBBI)->getNumber()+1;
+
+ for (; MBBI != E; ++MBBI, ++BlockNo) {
+ if (MBBI->getNumber() != (int)BlockNo) {
+ // Remove use of the old number.
+ if (MBBI->getNumber() != -1) {
+ assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+ "MBB number mismatch!");
+ MBBNumbering[MBBI->getNumber()] = 0;
+ }
+
+ // If BlockNo is already taken, set that block's number to -1.
+ if (MBBNumbering[BlockNo])
+ MBBNumbering[BlockNo]->setNumber(-1);
+
+ MBBNumbering[BlockNo] = MBBI;
+ MBBI->setNumber(BlockNo);
+ }
+ }
+
+ // Okay, all the blocks are renumbered. If we have compactified the block
+ // numbering, shrink MBBNumbering now.
+ assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+ MBBNumbering.resize(BlockNo);
+}
+
+/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
+/// of `new MachineInstr'.
+///
+MachineInstr *
+MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
+ DebugLoc DL, bool NoImp) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, MCID, DL, NoImp);
+}
+
+/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
+/// 'Orig' instruction, identical in all ways except the instruction
+/// has no parent, prev, or next.
+///
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, *Orig);
+}
+
+/// DeleteMachineInstr - Delete the given MachineInstr.
+///
+/// This function also serves as the MachineInstr destructor - the real
+/// ~MachineInstr() destructor must be empty.
+void
+MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+ // Strip it for parts. The operand array and the MI object itself are
+ // independently recyclable.
+ if (MI->Operands)
+ deallocateOperandArray(MI->CapOperands, MI->Operands);
+ // Don't call ~MachineInstr() which must be trivial anyway because
+ // ~MachineFunction drops whole lists of MachineInstrs wihout calling their
+ // destructors.
+ InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
+/// instead of `new MachineBasicBlock'.
+///
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+ return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+ MachineBasicBlock(*this, bb);
+}
+
+/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
+///
+void
+MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+ assert(MBB->getParent() == this && "MBB parent mismatch!");
+ MBB->~MachineBasicBlock();
+ BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+ uint64_t s, unsigned base_alignment,
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
+ return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+ TBAAInfo, Ranges);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ int64_t Offset, uint64_t Size) {
+ return new (Allocator)
+ MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+ MMO->getOffset()+Offset),
+ MMO->getFlags(), Size,
+ MMO->getBaseAlignment(), 0);
+}
+
+MachineInstr::mmo_iterator
+MachineFunction::allocateMemRefsArray(unsigned long Num) {
+ return Allocator.Allocate<MachineMemOperand *>(Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isLoad())
+ ++Num;
+
+ // Allocate a new array and populate it with the load information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isLoad()) {
+ if (!(*I)->isStore())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the store flag.
+ MachineMemOperand *JustLoad =
+ getMachineMemOperand((*I)->getPointerInfo(),
+ (*I)->getFlags() & ~MachineMemOperand::MOStore,
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
+ Result[Index] = JustLoad;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isStore())
+ ++Num;
+
+ // Allocate a new array and populate it with the store information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isStore()) {
+ if (!(*I)->isLoad())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the load flag.
+ MachineMemOperand *JustStore =
+ getMachineMemOperand((*I)->getPointerInfo(),
+ (*I)->getFlags() & ~MachineMemOperand::MOLoad,
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
+ Result[Index] = JustStore;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineFunction::dump() const {
+ print(dbgs());
+}
+#endif
+
+StringRef MachineFunction::getName() const {
+ assert(getFunction() && "No function!");
+ return getFunction()->getName();
+}
+
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+ OS << "# Machine code for function " << getName() << ": ";
+ if (RegInfo) {
+ OS << (RegInfo->isSSA() ? "SSA" : "Post SSA");
+ if (!RegInfo->tracksLiveness())
+ OS << ", not tracking liveness";
+ }
+ OS << '\n';
+
+ // Print Frame Information
+ FrameInfo->print(*this, OS);
+
+ // Print JumpTable Information
+ if (JumpTableInfo)
+ JumpTableInfo->print(OS);
+
+ // Print Constant Pool
+ ConstantPool->print(OS);
+
+ const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+
+ if (RegInfo && !RegInfo->livein_empty()) {
+ OS << "Function Live Ins: ";
+ for (MachineRegisterInfo::livein_iterator
+ I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+ OS << PrintReg(I->first, TRI);
+ if (I->second)
+ OS << " in " << PrintReg(I->second, TRI);
+ if (llvm::next(I) != E)
+ OS << ", ";
+ }
+ OS << '\n';
+ }
+
+ for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
+ OS << '\n';
+ BB->print(OS, Indexes);
+ }
+
+ OS << "\n# End machine code for function " << getName() << ".\n\n";
+}
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const MachineFunction *F) {
+ return "CFG for '" + F->getName().str() + "' function";
+ }
+
+ std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ std::string OutStr;
+ {
+ raw_string_ostream OSS(OutStr);
+
+ if (isSimple()) {
+ OSS << "BB#" << Node->getNumber();
+ if (const BasicBlock *BB = Node->getBasicBlock())
+ OSS << ": " << BB->getName();
+ } else
+ Node->print(OSS);
+ }
+
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ }
+ return OutStr;
+ }
+ };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getName());
+#else
+ errs() << "MachineFunction::viewCFG is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getName(), true);
+#else
+ errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// addLiveIn - Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+unsigned MachineFunction::addLiveIn(unsigned PReg,
+ const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = getRegInfo();
+ unsigned VReg = MRI.getLiveInVirtReg(PReg);
+ if (VReg) {
+ assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!");
+ return VReg;
+ }
+ VReg = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
+/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+/// normal 'L' label is returned.
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
+ bool isLinkerPrivate) const {
+ assert(JumpTableInfo && "No jump tables");
+ assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
+ const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
+
+ const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
+ MAI.getPrivateGlobalPrefix();
+ SmallString<60> Name;
+ raw_svector_ostream(Name)
+ << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+ const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(getFunctionNumber())+"$pb");
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// ensureMaxAlignment - Make sure the function is at least Align bytes
+/// aligned.
+void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+ if (!TFI.isStackRealignable() || !RealignOption)
+ assert(Align <= TFI.getStackAlignment() &&
+ "For targets without stack realignment, Align is out of limit!");
+ if (MaxAlignment < Align) MaxAlignment = Align;
+}
+
+/// clampStackAlignment - Clamp the alignment if requested and emit a warning.
+static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
+ unsigned StackAlign) {
+ if (!ShouldClamp || Align <= StackAlign)
+ return Align;
+ DEBUG(dbgs() << "Warning: requested alignment " << Align
+ << " exceeds the stack alignment " << StackAlign
+ << " when stack realignment is off" << '\n');
+ return StackAlign;
+}
+
+/// CreateStackObject - Create a new statically sized stack object, returning
+/// a nonnegative identifier to represent it.
+///
+int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+ bool isSS, bool MayNeedSP, const AllocaInst *Alloca) {
+ assert(Size != 0 && "Cannot allocate zero size stack objects!");
+ Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+ Alignment, TFI.getStackAlignment());
+ Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
+ Alloca));
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ assert(Index >= 0 && "Bad frame index!");
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+/// CreateSpillStackObject - Create a new statically sized stack object that
+/// represents a spill slot, returning a nonnegative identifier to represent
+/// it.
+///
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
+ unsigned Alignment) {
+ Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+ Alignment, TFI.getStackAlignment());
+ CreateStackObject(Size, Alignment, true, false);
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+/// CreateVariableSizedObject - Notify the MachineFrameInfo object that a
+/// variable sized object has been created. This must be created whenever a
+/// variable sized object is created, whether or not the index returned is
+/// actually used.
+///
+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) {
+ HasVarSizedObjects = true;
+ Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+ Alignment, TFI.getStackAlignment());
+ Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
+ ensureMaxAlignment(Alignment);
+ return (int)Objects.size()-NumFixedObjects-1;
+}
+
+/// CreateFixedObject - Create a new object at a fixed location on the stack.
+/// All fixed objects should be created before other objects are created for
+/// efficiency. By default, fixed objects are immutable. This returns an
+/// index with a negative value.
+///
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+ bool Immutable) {
+ assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = TFI.getStackAlignment();
+ unsigned Align = MinAlign(SPOffset, StackAlign);
+ Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+ Align, TFI.getStackAlignment());
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+ /*isSS*/ false,
+ /*NeedSP*/ false,
+ /*Alloca*/ 0));
+ return -++NumFixedObjects;
+}
+
+
+BitVector
+MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
+ assert(MBB && "MBB must be valid");
+ const MachineFunction *MF = MBB->getParent();
+ assert(MF && "MBB must be part of a MachineFunction");
+ const TargetMachine &TM = MF->getTarget();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ BitVector BV(TRI->getNumRegs());
+
+ // Before CSI is calculated, no registers are considered pristine. They can be
+ // freely used and PEI will make sure they are saved.
+ if (!isCalleeSavedInfoValid())
+ return BV;
+
+ for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+ BV.set(*CSR);
+
+ // The entry MBB always has all CSRs pristine.
+ if (MBB == &MF->front())
+ return BV;
+
+ // On other MBBs the saved CSRs are not pristine.
+ const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I)
+ BV.reset(I->getReg());
+
+ return BV;
+}
+
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ unsigned MaxAlign = getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+ if (isDeadObjectIndex(i))
+ continue;
+ Offset += getObjectSize(i);
+ unsigned Align = getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (adjustsStack() || hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+ if (Objects.empty()) return;
+
+ const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
+ int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ OS << "Frame Objects:\n";
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
+ if (SO.Size == ~0ULL) {
+ OS << "dead\n";
+ continue;
+ }
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size=" << SO.Size;
+ OS << ", align=" << SO.Alignment;
+
+ if (i < NumFixedObjects)
+ OS << ", fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << ", at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getEntrySize - Return the size of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
+ // The size of a jump table entry is 4 bytes unless the entry is just the
+ // address of a block, in which case it is the pointer size.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerSize();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return 8;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return 4;
+ case MachineJumpTableInfo::EK_Inline:
+ return 0;
+ }
+ llvm_unreachable("Unknown jump table encoding!");
+}
+
+/// getEntryAlignment - Return the alignment of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
+ // The alignment of a jump table entry is the alignment of int32 unless the
+ // entry is just the address of a block, in which case it is the pointer
+ // alignment.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerABIAlignment();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return TD.getABIIntegerTypeAlignment(64);
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return TD.getABIIntegerTypeAlignment(32);
+ case MachineJumpTableInfo::EK_Inline:
+ return 1;
+ }
+ llvm_unreachable("Unknown jump table encoding!");
+}
+
+/// createJumpTableIndex - Create a new jump table entry in the jump table info.
+///
+unsigned MachineJumpTableInfo::createJumpTableIndex(
+ const std::vector<MachineBasicBlock*> &DestBBs) {
+ assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+ JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+ return JumpTables.size()-1;
+}
+
+/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
+/// the jump tables to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+ ReplaceMBBInJumpTable(i, Old, New);
+ return MadeChange;
+}
+
+/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+/// the jump table to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+ MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ MachineJumpTableEntry &JTE = JumpTables[Idx];
+ for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+ if (JTE.MBBs[j] == Old) {
+ JTE.MBBs[j] = New;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
+ if (JumpTables.empty()) return;
+
+ OS << "Jump Tables:\n";
+
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+ OS << " jt#" << i << ": ";
+ for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
+ OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
+ }
+
+ OS << '\n';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineJumpTableInfo::dump() const { print(dbgs()); }
+#endif
+
+
+//===----------------------------------------------------------------------===//
+// MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+void MachineConstantPoolValue::anchor() { }
+
+Type *MachineConstantPoolEntry::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+
+unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getRelocationInfo();
+ return Val.ConstVal->getRelocationInfo();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].isMachineConstantPoolEntry())
+ delete Constants[i].Val.MachineCPVal;
+ for (DenseSet<MachineConstantPoolValue*>::iterator I =
+ MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
+ I != E; ++I)
+ delete *I;
+}
+
+/// CanShareConstantPoolEntry - Test whether the given two constants
+/// can be allocated the same constant pool entry.
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
+ const DataLayout *TD) {
+ // Handle the trivial case quickly.
+ if (A == B) return true;
+
+ // If they have the same type but weren't the same constant, quickly
+ // reject them.
+ if (A->getType() == B->getType()) return false;
+
+ // We can't handle structs or arrays.
+ if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
+ isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
+ return false;
+
+ // For now, only support constants with the same size.
+ uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
+ if (StoreSize != TD->getTypeStoreSize(B->getType()) ||
+ StoreSize > 128)
+ return false;
+
+ Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
+
+ // Try constant folding a bitcast of both instructions to an integer. If we
+ // get two identical ConstantInt's, then we are good to share them. We use
+ // the constant folding APIs to do this so that we get the benefit of
+ // DataLayout.
+ if (isa<PointerType>(A->getType()))
+ A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+ const_cast<Constant*>(A), TD);
+ else if (A->getType() != IntTy)
+ A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+ const_cast<Constant*>(A), TD);
+ if (isa<PointerType>(B->getType()))
+ B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+ const_cast<Constant*>(B), TD);
+ else if (B->getType() != IntTy)
+ B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+ const_cast<Constant*>(B), TD);
+
+ return A == B;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one. User must specify the log2 of the minimum required
+/// alignment for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (!Constants[i].isMachineConstantPoolEntry() &&
+ CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) {
+ if ((unsigned)Constants[i].getAlignment() < Alignment)
+ Constants[i].Alignment = Alignment;
+ return i;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+ return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ int Idx = V->getExistingMachineCPValue(this, Alignment);
+ if (Idx != -1) {
+ MachineCPVsSharingEntries.insert(V);
+ return (unsigned)Idx;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+ return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+ if (Constants.empty()) return;
+
+ OS << "Constant Pool:\n";
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ OS << " cp#" << i << ": ";
+ if (Constants[i].isMachineConstantPoolEntry())
+ Constants[i].Val.MachineCPVal->print(OS);
+ else
+ OS << *(const Value*)Constants[i].Val.ConstVal;
+ OS << ", align=" << Constants[i].getAlignment();
+ OS << "\n";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineConstantPool::dump() const { print(dbgs()); }
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 0000000..35591e1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,57 @@
+//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+using namespace llvm;
+
+char MachineFunctionAnalysis::ID = 0;
+
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) :
+ FunctionPass(ID), TM(tm), MF(0) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineFunctionAnalysis::~MachineFunctionAnalysis() {
+ releaseMemory();
+ assert(!MF && "MachineFunctionAnalysis left initialized!");
+}
+
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+}
+
+bool MachineFunctionAnalysis::doInitialization(Module &M) {
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI && "MMI not around yet??");
+ MMI->setModule(&M);
+ NextFnNum = 0;
+ return false;
+}
+
+
+bool MachineFunctionAnalysis::runOnFunction(Function &F) {
+ assert(!MF && "MachineFunctionAnalysis already initialized!");
+ MF = new MachineFunction(&F, TM, NextFnNum++,
+ getAnalysis<MachineModuleInfo>(),
+ getAnalysisIfAvailable<GCModuleInfo>());
+ return false;
+}
+
+void MachineFunctionAnalysis::releaseMemory() {
+ delete MF;
+ MF = 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 0000000..674cc80
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,56 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+using namespace llvm;
+
+Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createMachineFunctionPrinterPass(O, Banner);
+}
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+ return runOnMachineFunction(MF);
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+
+ // MachineFunctionPass preserves all LLVM IR passes, but there's no
+ // high-level way to express this. Instead, just list a bunch of
+ // passes explicitly. This does not include setPreservesCFG,
+ // because CodeGen overloads that to mean preserving the MachineBasicBlock
+ // CFG in addition to the LLVM IR CFG.
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved("scalar-evolution");
+ AU.addPreserved("iv-users");
+ AU.addPreserved("memdep");
+ AU.addPreserved("live-values");
+ AU.addPreserved("domtree");
+ AU.addPreserved("domfrontier");
+ AU.addPreserved("loops");
+ AU.addPreserved("lda");
+
+ FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
new file mode 100644
index 0000000..fa9c821
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -0,0 +1,67 @@
+//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineFunctionPrinterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+/// MachineFunctionPrinterPass - This is a pass to dump the IR of a
+/// MachineFunction.
+///
+struct MachineFunctionPrinterPass : public MachineFunctionPass {
+ static char ID;
+
+ raw_ostream &OS;
+ const std::string Banner;
+
+ MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { }
+ MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
+ : MachineFunctionPass(ID), OS(os), Banner(banner) {}
+
+ const char *getPassName() const { return "MachineFunction Printer"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ OS << "# " << Banner << ":\n";
+ MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
+ return false;
+ }
+};
+
+char MachineFunctionPrinterPass::ID = 0;
+}
+
+char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs",
+ "Machine Function Printer", false, false)
+
+namespace llvm {
+/// Returns a newly-created MachineFunction Printer pass. The
+/// default banner is empty.
+///
+MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
+ const std::string &Banner){
+ return new MachineFunctionPrinterPass(OS, Banner);
+}
+
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 0000000..32d0668
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,1867 @@
+//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineOperand Implementation
+//===----------------------------------------------------------------------===//
+
+void MachineOperand::setReg(unsigned Reg) {
+ if (getReg() == Reg) return; // No change.
+
+ // Otherwise, we have to change the register. If this operand is embedded
+ // into a machine function, we need to update the old and new register's
+ // use/def lists.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ SmallContents.RegNo = Reg;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+
+ // Otherwise, just change the register, no problem. :)
+ SmallContents.RegNo = Reg;
+}
+
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ if (SubIdx && getSubReg())
+ SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+ setReg(Reg);
+ if (SubIdx)
+ setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (getSubReg()) {
+ Reg = TRI.getSubReg(Reg, getSubReg());
+ // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+ // That won't happen in legal code.
+ setSubReg(0);
+ }
+ setReg(Reg);
+}
+
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert((!Val || !isDebug()) && "Marking a debug operation as def");
+ if (IsDef == Val)
+ return;
+ // MRI may keep uses and defs in different list positions.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ IsDef = Val;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+ IsDef = Val;
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value. If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+ // If this operand is currently a register operand, and if this is in a
+ // function, deregister the operand from the register's use/def list.
+ if (isReg() && isOnRegUseList())
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ MF->getRegInfo().removeRegOperandFromUseList(this);
+
+ OpKind = MO_Immediate;
+ Contents.ImmVal = ImmVal;
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value. If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+ bool isKill, bool isDead, bool isUndef,
+ bool isDebug) {
+ MachineRegisterInfo *RegInfo = 0;
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ RegInfo = &MF->getRegInfo();
+ // If this operand is already a register operand, remove it from the
+ // register's use/def lists.
+ bool WasReg = isReg();
+ if (RegInfo && WasReg)
+ RegInfo->removeRegOperandFromUseList(this);
+
+ // Change this to a register and set the reg#.
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+ SubReg_TargetFlags = 0;
+ IsDef = isDef;
+ IsImp = isImp;
+ IsKill = isKill;
+ IsDead = isDead;
+ IsUndef = isUndef;
+ IsInternalRead = false;
+ IsEarlyClobber = false;
+ IsDebug = isDebug;
+ // Ensure isOnRegUseList() returns false.
+ Contents.Reg.Prev = 0;
+ // Preserve the tie when the operand was already a register.
+ if (!WasReg)
+ TiedTo = 0;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(this);
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand. Note that this should stay in sync with the hash_value overload
+/// below.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType() ||
+ getTargetFlags() != Other.getTargetFlags())
+ return false;
+
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+ getSubReg() == Other.getSubReg();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_CImmediate:
+ return getCImm() == Other.getCImm();
+ case MachineOperand::MO_FPImmediate:
+ return getFPImm() == Other.getFPImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_BlockAddress:
+ return getBlockAddress() == Other.getBlockAddress() &&
+ getOffset() == Other.getOffset();
+ case MO_RegisterMask:
+ return getRegMask() == Other.getRegMask();
+ case MachineOperand::MO_MCSymbol:
+ return getMCSymbol() == Other.getMCSymbol();
+ case MachineOperand::MO_Metadata:
+ return getMetadata() == Other.getMetadata();
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+// Note: this must stay exactly in sync with isIdenticalTo above.
+hash_code llvm::hash_value(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ // Register operands don't have target flags.
+ return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
+ case MachineOperand::MO_FrameIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
+ MO.getOffset());
+ case MachineOperand::MO_JumpTableIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ MO.getSymbolName());
+ case MachineOperand::MO_GlobalAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
+ MO.getOffset());
+ case MachineOperand::MO_BlockAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getBlockAddress(), MO.getOffset());
+ case MachineOperand::MO_RegisterMask:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+ case MachineOperand::MO_Metadata:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
+ case MachineOperand::MO_MCSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+/// print - Print the specified machine operand.
+///
+void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // If the instruction is embedded into a basic block, we can find the
+ // target info for the instruction.
+ if (!TM)
+ if (const MachineInstr *MI = getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ TM = &MF->getTarget();
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ OS << PrintReg(getReg(), TRI, getSubReg());
+
+ if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
+ isInternalRead() || isEarlyClobber() || isTied()) {
+ OS << '<';
+ bool NeedComma = false;
+ if (isDef()) {
+ if (NeedComma) OS << ',';
+ if (isEarlyClobber())
+ OS << "earlyclobber,";
+ if (isImplicit())
+ OS << "imp-";
+ OS << "def";
+ NeedComma = true;
+ // <def,read-undef> only makes sense when getSubReg() is set.
+ // Don't clutter the output otherwise.
+ if (isUndef() && getSubReg())
+ OS << ",read-undef";
+ } else if (isImplicit()) {
+ OS << "imp-use";
+ NeedComma = true;
+ }
+
+ if (isKill()) {
+ if (NeedComma) OS << ',';
+ OS << "kill";
+ NeedComma = true;
+ }
+ if (isDead()) {
+ if (NeedComma) OS << ',';
+ OS << "dead";
+ NeedComma = true;
+ }
+ if (isUndef() && isUse()) {
+ if (NeedComma) OS << ',';
+ OS << "undef";
+ NeedComma = true;
+ }
+ if (isInternalRead()) {
+ if (NeedComma) OS << ',';
+ OS << "internal";
+ NeedComma = true;
+ }
+ if (isTied()) {
+ if (NeedComma) OS << ',';
+ OS << "tied";
+ if (TiedTo != 15)
+ OS << unsigned(TiedTo - 1);
+ NeedComma = true;
+ }
+ OS << '>';
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << getImm();
+ break;
+ case MachineOperand::MO_CImmediate:
+ getCImm()->getValue().print(OS, false);
+ break;
+ case MachineOperand::MO_FPImmediate:
+ if (getFPImm()->getType()->isFloatTy())
+ OS << getFPImm()->getValueAPF().convertToFloat();
+ else
+ OS << getFPImm()->getValueAPF().convertToDouble();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << "<BB#" << getMBB()->getNumber() << ">";
+ break;
+ case MachineOperand::MO_FrameIndex:
+ OS << "<fi#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "<cp#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_TargetIndex:
+ OS << "<ti#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "<jt#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << "<ga:";
+ WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << "<es:" << getSymbolName();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_BlockAddress:
+ OS << '<';
+ WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_RegisterMask:
+ OS << "<regmask>";
+ break;
+ case MachineOperand::MO_Metadata:
+ OS << '<';
+ WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
+ OS << '>';
+ break;
+ case MachineOperand::MO_MCSymbol:
+ OS << "<MCSym=" << *getMCSymbol() << '>';
+ break;
+ }
+
+ if (unsigned TF = getTargetFlags())
+ OS << "[TF=" << TF << ']';
+}
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+ if (V == 0) return 0;
+ return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+ return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+ return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+ return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+ return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+ return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+ uint64_t s, unsigned int a,
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges)
+ : PtrInfo(ptrinfo), Size(s),
+ Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+ TBAAInfo(TBAAInfo), Ranges(Ranges) {
+ assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+ "invalid pointer value");
+ assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+ assert((isLoad() || isStore()) && "Not a load/store!");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(getOffset());
+ ID.AddInteger(Size);
+ ID.AddPointer(getValue());
+ ID.AddInteger(Flags);
+}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+ // The Value and Offset may differ due to CSE. But the flags and size
+ // should be the same.
+ assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+ assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+ if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+ // Update the alignment value.
+ Flags = (Flags & ((1 << MOMaxBits) - 1)) |
+ ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
+ // Also update the base and offset, because the new alignment may
+ // not be applicable with the old ones.
+ PtrInfo = MMO->PtrInfo;
+ }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+ return MinAlign(getBaseAlignment(), getOffset());
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
+ assert((MMO.isLoad() || MMO.isStore()) &&
+ "SV has to be a load, store or both.");
+
+ if (MMO.isVolatile())
+ OS << "Volatile ";
+
+ if (MMO.isLoad())
+ OS << "LD";
+ if (MMO.isStore())
+ OS << "ST";
+ OS << MMO.getSize();
+
+ // Print the address information.
+ OS << "[";
+ if (!MMO.getValue())
+ OS << "<unknown>";
+ else
+ WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false);
+
+ // If the alignment of the memory reference itself differs from the alignment
+ // of the base pointer, print the base alignment explicitly, next to the base
+ // pointer.
+ if (MMO.getBaseAlignment() != MMO.getAlignment())
+ OS << "(align=" << MMO.getBaseAlignment() << ")";
+
+ if (MMO.getOffset() != 0)
+ OS << "+" << MMO.getOffset();
+ OS << "]";
+
+ // Print the alignment of the reference.
+ if (MMO.getBaseAlignment() != MMO.getAlignment() ||
+ MMO.getBaseAlignment() != MMO.getSize())
+ OS << "(align=" << MMO.getAlignment() << ")";
+
+ // Print TBAA info.
+ if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+ OS << "(tbaa=";
+ if (TBAAInfo->getNumOperands() > 0)
+ WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print nontemporal info.
+ if (MMO.isNonTemporal())
+ OS << "(nontemporal)";
+
+ return OS;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr Implementation
+//===----------------------------------------------------------------------===//
+
+void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
+ if (MCID->ImplicitDefs)
+ for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (MCID->ImplicitUses)
+ for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
+ addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true));
+}
+
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the MCInstrDesc.
+MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
+ const DebugLoc dl, bool NoImp)
+ : MCID(&tid), Parent(0), Operands(0), NumOperands(0),
+ Flags(0), AsmPrinterFlags(0),
+ NumMemRefs(0), MemRefs(0), debugLoc(dl) {
+ // Reserve space for the expected number of operands.
+ if (unsigned NumOps = MCID->getNumOperands() +
+ MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) {
+ CapOperands = OperandCapacity::get(NumOps);
+ Operands = MF.allocateOperandArray(CapOperands);
+ }
+
+ if (!NoImp)
+ addImplicitDefUseOperands(MF);
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+ : MCID(&MI.getDesc()), Parent(0), Operands(0), NumOperands(0),
+ Flags(0), AsmPrinterFlags(0),
+ NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
+ debugLoc(MI.getDebugLoc()) {
+ CapOperands = OperandCapacity::get(MI.getNumOperands());
+ Operands = MF.allocateOperandArray(CapOperands);
+
+ // Copy operands.
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+ addOperand(MF, MI.getOperand(i));
+
+ // Copy all the sensible flags.
+ setFlags(MI.Flags);
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+ if (MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return 0;
+}
+
+/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands already be on their use lists.
+void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (Operands[i].isReg())
+ MRI.removeRegOperandFromUseList(&Operands[i]);
+}
+
+/// AddRegOperandsToUseLists - Add all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands not be on their use lists yet.
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (Operands[i].isReg())
+ MRI.addRegOperandToUseList(&Operands[i]);
+}
+
+void MachineInstr::addOperand(const MachineOperand &Op) {
+ MachineBasicBlock *MBB = getParent();
+ assert(MBB && "Use MachineInstrBuilder to add operands to dangling instrs");
+ MachineFunction *MF = MBB->getParent();
+ assert(MF && "Use MachineInstrBuilder to add operands to dangling instrs");
+ addOperand(*MF, Op);
+}
+
+/// Move NumOps MachineOperands from Src to Dst, with support for overlapping
+/// ranges. If MRI is non-null also update use-def chains.
+static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
+ unsigned NumOps, MachineRegisterInfo *MRI) {
+ if (MRI)
+ return MRI->moveOperands(Dst, Src, NumOps);
+
+ // Here it would be convenient to call memmove, so that isn't allowed because
+ // MachineOperand has a constructor and so isn't a POD type.
+ if (Dst < Src)
+ for (unsigned i = 0; i != NumOps; ++i)
+ new (Dst + i) MachineOperand(Src[i]);
+ else
+ for (unsigned i = NumOps; i ; --i)
+ new (Dst + i - 1) MachineOperand(Src[i - 1]);
+}
+
+/// addOperand - Add the specified operand to the instruction. If it is an
+/// implicit operand, it is added to the end of the operand list. If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand).
+void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
+ assert(MCID && "Cannot add operands before providing an instr descriptor");
+
+ // Check if we're adding one of our existing operands.
+ if (&Op >= Operands && &Op < Operands + NumOperands) {
+ // This is unusual: MI->addOperand(MI->getOperand(i)).
+ // If adding Op requires reallocating or moving existing operands around,
+ // the Op reference could go stale. Support it by copying Op.
+ MachineOperand CopyOp(Op);
+ return addOperand(MF, CopyOp);
+ }
+
+ // Find the insert location for the new operand. Implicit registers go at
+ // the end, everything else goes before the implicit regs.
+ //
+ // FIXME: Allow mixed explicit and implicit operands on inline asm.
+ // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as
+ // implicit-defs, but they must not be moved around. See the FIXME in
+ // InstrEmitter.cpp.
+ unsigned OpNo = getNumOperands();
+ bool isImpReg = Op.isReg() && Op.isImplicit();
+ if (!isImpReg && !isInlineAsm()) {
+ while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
+ --OpNo;
+ assert(!Operands[OpNo].isTied() && "Cannot move tied operands");
+ }
+ }
+
+ // OpNo now points as the desired insertion point. Unless this is a variadic
+ // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
+ // RegMask operands go between the explicit and implicit operands.
+ assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
+ OpNo < MCID->getNumOperands()) &&
+ "Trying to add an operand to a machine instr that is already done!");
+
+ MachineRegisterInfo *MRI = getRegInfo();
+
+ // Determine if the Operands array needs to be reallocated.
+ // Save the old capacity and operand array.
+ OperandCapacity OldCap = CapOperands;
+ MachineOperand *OldOperands = Operands;
+ if (!OldOperands || OldCap.getSize() == getNumOperands()) {
+ CapOperands = OldOperands ? OldCap.getNext() : OldCap.get(1);
+ Operands = MF.allocateOperandArray(CapOperands);
+ // Move the operands before the insertion point.
+ if (OpNo)
+ moveOperands(Operands, OldOperands, OpNo, MRI);
+ }
+
+ // Move the operands following the insertion point.
+ if (OpNo != NumOperands)
+ moveOperands(Operands + OpNo + 1, OldOperands + OpNo, NumOperands - OpNo,
+ MRI);
+ ++NumOperands;
+
+ // Deallocate the old operand array.
+ if (OldOperands != Operands && OldOperands)
+ MF.deallocateOperandArray(OldCap, OldOperands);
+
+ // Copy Op into place. It still needs to be inserted into the MRI use lists.
+ MachineOperand *NewMO = new (Operands + OpNo) MachineOperand(Op);
+ NewMO->ParentMI = this;
+
+ // When adding a register operand, tell MRI about it.
+ if (NewMO->isReg()) {
+ // Ensure isOnRegUseList() returns false, regardless of Op's status.
+ NewMO->Contents.Reg.Prev = 0;
+ // Ignore existing ties. This is not a property that can be copied.
+ NewMO->TiedTo = 0;
+ // Add the new operand to MRI, but only for instructions in an MBB.
+ if (MRI)
+ MRI->addRegOperandToUseList(NewMO);
+ // The MCID operand information isn't accurate until we start adding
+ // explicit operands. The implicit operands are added first, then the
+ // explicits are inserted before them.
+ if (!isImpReg) {
+ // Tie uses to defs as indicated in MCInstrDesc.
+ if (NewMO->isUse()) {
+ int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ tieOperands(DefIdx, OpNo);
+ }
+ // If the register operand is flagged as early, mark the operand as such.
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ NewMO->setIsEarlyClobber(true);
+ }
+ }
+}
+
+/// RemoveOperand - Erase an operand from an instruction, leaving it with one
+/// fewer operand than it started with.
+///
+void MachineInstr::RemoveOperand(unsigned OpNo) {
+ assert(OpNo < getNumOperands() && "Invalid operand number");
+ untieRegOperand(OpNo);
+
+#ifndef NDEBUG
+ // Moving tied operands would break the ties.
+ for (unsigned i = OpNo + 1, e = getNumOperands(); i != e; ++i)
+ if (Operands[i].isReg())
+ assert(!Operands[i].isTied() && "Cannot move tied operands");
+#endif
+
+ MachineRegisterInfo *MRI = getRegInfo();
+ if (MRI && Operands[OpNo].isReg())
+ MRI->removeRegOperandFromUseList(Operands + OpNo);
+
+ // Don't call the MachineOperand destructor. A lot of this code depends on
+ // MachineOperand having a trivial destructor anyway, and adding a call here
+ // wouldn't make it 'destructor-correct'.
+
+ if (unsigned N = NumOperands - 1 - OpNo)
+ moveOperands(Operands + OpNo, Operands + OpNo + 1, N, MRI);
+ --NumOperands;
+}
+
+/// addMemOperand - Add a MachineMemOperand to the machine instruction.
+/// This function should be used only occasionally. The setMemRefs function
+/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::addMemOperand(MachineFunction &MF,
+ MachineMemOperand *MO) {
+ mmo_iterator OldMemRefs = MemRefs;
+ unsigned OldNumMemRefs = NumMemRefs;
+
+ unsigned NewNum = NumMemRefs + 1;
+ mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+
+ std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
+ NewMemRefs[NewNum - 1] = MO;
+ setMemRefs(NewMemRefs, NewMemRefs + NewNum);
+}
+
+bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+ assert(!isBundledWithPred() && "Must be called on bundle header");
+ for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) {
+ if (MII->getDesc().getFlags() & Mask) {
+ if (Type == AnyInBundle)
+ return true;
+ } else {
+ if (Type == AllInBundle && !MII->isBundle())
+ return false;
+ }
+ // This was the last instruction in the bundle.
+ if (!MII->isBundledWithSucc())
+ return Type == AllInBundle;
+ }
+}
+
+bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
+ MICheckType Check) const {
+ // If opcodes or number of operands are not the same then the two
+ // instructions are obviously not identical.
+ if (Other->getOpcode() != getOpcode() ||
+ Other->getNumOperands() != getNumOperands())
+ return false;
+
+ if (isBundle()) {
+ // Both instructions are bundles, compare MIs inside the bundle.
+ MachineBasicBlock::const_instr_iterator I1 = *this;
+ MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I2 = *Other;
+ MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
+ while (++I1 != E1 && I1->isInsideBundle()) {
+ ++I2;
+ if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+ return false;
+ }
+ }
+
+ // Check operands to make sure they match.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ const MachineOperand &OMO = Other->getOperand(i);
+ if (!MO.isReg()) {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ continue;
+ }
+
+ // Clients may or may not want to ignore defs when testing for equality.
+ // For example, machine CSE pass only cares about finding common
+ // subexpressions, so it's safe to ignore virtual register defs.
+ if (MO.isDef()) {
+ if (Check == IgnoreDefs)
+ continue;
+ else if (Check == IgnoreVRegDefs) {
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+ if (MO.getReg() != OMO.getReg())
+ return false;
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isDead() != OMO.isDead())
+ return false;
+ }
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isKill() != OMO.isKill())
+ return false;
+ }
+ }
+ // If DebugLoc does not match then two dbg.values are not identical.
+ if (isDebugValue())
+ if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
+ && getDebugLoc() != Other->getDebugLoc())
+ return false;
+ return true;
+}
+
+MachineInstr *MachineInstr::removeFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ return getParent()->remove(this);
+}
+
+MachineInstr *MachineInstr::removeFromBundle() {
+ assert(getParent() && "Not embedded in a basic block!");
+ return getParent()->remove_instr(this);
+}
+
+void MachineInstr::eraseFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase(this);
+}
+
+void MachineInstr::eraseFromBundle() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase_instr(this);
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+ unsigned NumOperands = MCID->getNumOperands();
+ if (!MCID->isVariadic())
+ return NumOperands;
+
+ for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit())
+ NumOperands++;
+ }
+ return NumOperands;
+}
+
+void MachineInstr::bundleWithPred() {
+ assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
+ setFlag(BundledPred);
+ MachineBasicBlock::instr_iterator Pred = this;
+ --Pred;
+ assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+ Pred->setFlag(BundledSucc);
+}
+
+void MachineInstr::bundleWithSucc() {
+ assert(!isBundledWithSucc() && "MI is already bundled with its successor");
+ setFlag(BundledSucc);
+ MachineBasicBlock::instr_iterator Succ = this;
+ ++Succ;
+ assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags");
+ Succ->setFlag(BundledPred);
+}
+
+void MachineInstr::unbundleFromPred() {
+ assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
+ clearFlag(BundledPred);
+ MachineBasicBlock::instr_iterator Pred = this;
+ --Pred;
+ assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+ Pred->clearFlag(BundledSucc);
+}
+
+void MachineInstr::unbundleFromSucc() {
+ assert(isBundledWithSucc() && "MI isn't bundled with its successor");
+ clearFlag(BundledSucc);
+ MachineBasicBlock::instr_iterator Succ = this;
+ ++Succ;
+ assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
+ Succ->clearFlag(BundledPred);
+}
+
+bool MachineInstr::isStackAligningInlineAsm() const {
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ return true;
+ }
+ return false;
+}
+
+InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const {
+ assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!");
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0);
+}
+
+int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
+ unsigned *GroupNo) const {
+ assert(isInlineAsm() && "Expected an inline asm instruction");
+ assert(OpIdx < getNumOperands() && "OpIdx out of range");
+
+ // Ignore queries about the initial operands.
+ if (OpIdx < InlineAsm::MIOp_FirstOperand)
+ return -1;
+
+ unsigned Group = 0;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ // If we reach the implicit register operands, stop looking.
+ if (!FlagMO.isImm())
+ return -1;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ if (i + NumOps > OpIdx) {
+ if (GroupNo)
+ *GroupNo = Group;
+ return i;
+ }
+ ++Group;
+ }
+ return -1;
+}
+
+const TargetRegisterClass*
+MachineInstr::getRegClassConstraint(unsigned OpIdx,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) const {
+ assert(getParent() && "Can't have an MBB reference here!");
+ assert(getParent()->getParent() && "Can't have an MF reference here!");
+ const MachineFunction &MF = *getParent()->getParent();
+
+ // Most opcodes have fixed constraints in their MCInstrDesc.
+ if (!isInlineAsm())
+ return TII->getRegClass(getDesc(), OpIdx, TRI, MF);
+
+ if (!getOperand(OpIdx).isReg())
+ return NULL;
+
+ // For tied uses on inline asm, get the constraint from the def.
+ unsigned DefIdx;
+ if (getOperand(OpIdx).isUse() && isRegTiedToDefOperand(OpIdx, &DefIdx))
+ OpIdx = DefIdx;
+
+ // Inline asm stores register class constraints in the flag word.
+ int FlagIdx = findInlineAsmFlagIdx(OpIdx);
+ if (FlagIdx < 0)
+ return NULL;
+
+ unsigned Flag = getOperand(FlagIdx).getImm();
+ unsigned RCID;
+ if (InlineAsm::hasRegClassConstraint(Flag, RCID))
+ return TRI->getRegClass(RCID);
+
+ // Assume that all registers in a memory operand are pointers.
+ if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
+ return TRI->getPointerRegClass(MF);
+
+ return NULL;
+}
+
+/// Return the number of instructions inside the MI bundle, not counting the
+/// header instruction.
+unsigned MachineInstr::getBundleSize() const {
+ MachineBasicBlock::const_instr_iterator I = this;
+ unsigned Size = 0;
+ while (I->isBundledWithSucc())
+ ++Size, ++I;
+ return Size;
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightens
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isKill || MO.isKill())
+ return i;
+ }
+ return -1;
+}
+
+/// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+/// indicating if this instruction reads or writes Reg. This also considers
+/// partial defines.
+std::pair<bool,bool>
+MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+ SmallVectorImpl<unsigned> *Ops) const {
+ bool PartDef = false; // Partial redefine.
+ bool FullDef = false; // Full define.
+ bool Use = false;
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (Ops)
+ Ops->push_back(i);
+ if (MO.isUse())
+ Use |= !MO.isUndef();
+ else if (MO.getSubReg() && !MO.isUndef())
+ // A partial <def,undef> doesn't count as reading the register.
+ PartDef = true;
+ else
+ FullDef = true;
+ }
+ // A partial redefine uses Reg unless there is also a full define.
+ return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef);
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int
+MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+ const TargetRegisterInfo *TRI) const {
+ bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ // Accept regmask operands when Overlap is set.
+ // Ignore them when looking for a specific def operand (Overlap == false).
+ if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ return i;
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ bool Found = (MOReg == Reg);
+ if (!Found && TRI && isPhys &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ if (Overlap)
+ Found = TRI->regsOverlap(MOReg, Reg);
+ else
+ Found = TRI->isSubRegister(MOReg, Reg);
+ }
+ if (Found && (!isDead || MO.isDead()))
+ return i;
+ }
+ return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+ // Don't call MCID.findFirstPredOperandIdx() because this variant
+ // is sometimes called on an instruction that's not yet complete, and
+ // so the number of operands is less than the MCID indicates. In
+ // particular, the PTX target does this.
+ const MCInstrDesc &MCID = getDesc();
+ if (MCID.isPredicable()) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (MCID.OpInfo[i].isPredicate())
+ return i;
+ }
+
+ return -1;
+}
+
+// MachineOperand::TiedTo is 4 bits wide.
+const unsigned TiedMax = 15;
+
+/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other.
+///
+/// Use and def operands can be tied together, indicated by a non-zero TiedTo
+/// field. TiedTo can have these values:
+///
+/// 0: Operand is not tied to anything.
+/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1).
+/// TiedMax: Tied to an operand >= TiedMax-1.
+///
+/// The tied def must be one of the first TiedMax operands on a normal
+/// instruction. INLINEASM instructions allow more tied defs.
+///
+void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) {
+ MachineOperand &DefMO = getOperand(DefIdx);
+ MachineOperand &UseMO = getOperand(UseIdx);
+ assert(DefMO.isDef() && "DefIdx must be a def operand");
+ assert(UseMO.isUse() && "UseIdx must be a use operand");
+ assert(!DefMO.isTied() && "Def is already tied to another use");
+ assert(!UseMO.isTied() && "Use is already tied to another def");
+
+ if (DefIdx < TiedMax)
+ UseMO.TiedTo = DefIdx + 1;
+ else {
+ // Inline asm can use the group descriptors to find tied operands, but on
+ // normal instruction, the tied def must be within the first TiedMax
+ // operands.
+ assert(isInlineAsm() && "DefIdx out of range");
+ UseMO.TiedTo = TiedMax;
+ }
+
+ // UseIdx can be out of range, we'll search for it in findTiedOperandIdx().
+ DefMO.TiedTo = std::min(UseIdx + 1, TiedMax);
+}
+
+/// Given the index of a tied register operand, find the operand it is tied to.
+/// Defs are tied to uses and vice versa. Returns the index of the tied operand
+/// which must exist.
+unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
+ const MachineOperand &MO = getOperand(OpIdx);
+ assert(MO.isTied() && "Operand isn't tied");
+
+ // Normally TiedTo is in range.
+ if (MO.TiedTo < TiedMax)
+ return MO.TiedTo - 1;
+
+ // Uses on normal instructions can be out of range.
+ if (!isInlineAsm()) {
+ // Normal tied defs must be in the 0..TiedMax-1 range.
+ if (MO.isUse())
+ return TiedMax - 1;
+ // MO is a def. Search for the tied use.
+ for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &UseMO = getOperand(i);
+ if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1)
+ return i;
+ }
+ llvm_unreachable("Can't find tied use");
+ }
+
+ // Now deal with inline asm by parsing the operand group descriptor flags.
+ // Find the beginning of each operand group.
+ SmallVector<unsigned, 8> GroupIdx;
+ unsigned OpIdxGroup = ~0u;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ assert(FlagMO.isImm() && "Invalid tied operand on inline asm");
+ unsigned CurGroup = GroupIdx.size();
+ GroupIdx.push_back(i);
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ // OpIdx belongs to this operand group.
+ if (OpIdx > i && OpIdx < i + NumOps)
+ OpIdxGroup = CurGroup;
+ unsigned TiedGroup;
+ if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup))
+ continue;
+ // Operands in this group are tied to operands in TiedGroup which must be
+ // earlier. Find the number of operands between the two groups.
+ unsigned Delta = i - GroupIdx[TiedGroup];
+
+ // OpIdx is a use tied to TiedGroup.
+ if (OpIdxGroup == CurGroup)
+ return OpIdx - Delta;
+
+ // OpIdx is a def tied to this use group.
+ if (OpIdxGroup == TiedGroup)
+ return OpIdx + Delta;
+ }
+ llvm_unreachable("Invalid tied operand on inline asm");
+}
+
+/// clearKillInfo - Clears kill flags on all operands.
+///
+void MachineInstr::clearKillInfo() {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse())
+ MO.setIsKill(false);
+ }
+}
+
+void MachineInstr::substituteRegister(unsigned FromReg,
+ unsigned ToReg,
+ unsigned SubIdx,
+ const TargetRegisterInfo &RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (SubIdx)
+ ToReg = RegInfo.getSubReg(ToReg, SubIdx);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substPhysReg(ToReg, RegInfo);
+ }
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substVirtReg(ToReg, SubIdx, RegInfo);
+ }
+ }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
+ AliasAnalysis *AA,
+ bool &SawStore) const {
+ // Ignore stuff that we obviously can't move.
+ //
+ // Treat volatile loads as stores. This is not strictly necessary for
+ // volatiles, but it is required for atomic loads. It is not allowed to move
+ // a load across an atomic load with Ordering > Monotonic.
+ if (mayStore() || isCall() ||
+ (mayLoad() && hasOrderedMemoryRef())) {
+ SawStore = true;
+ return false;
+ }
+
+ if (isLabel() || isDebugValue() ||
+ isTerminator() || hasUnmodeledSideEffects())
+ return false;
+
+ // See if this instruction does a load. If so, we have to guarantee that the
+ // loaded value doesn't change between the load and the its intended
+ // destination. The check for isInvariantLoad gives the targe the chance to
+ // classify the load as always returning a constant, e.g. a constant pool
+ // load.
+ if (mayLoad() && !isInvariantLoad(AA))
+ // Otherwise, this is a real load. If there is a store between the load and
+ // end of block, we can't move it.
+ return !SawStore;
+
+ return true;
+}
+
+/// isSafeToReMat - Return true if it's safe to rematerialize the specified
+/// instruction which defined the specified register instead of copying it.
+bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
+ AliasAnalysis *AA,
+ unsigned DstReg) const {
+ bool SawStore = false;
+ if (!TII->isTriviallyReMaterializable(this, AA) ||
+ !isSafeToMove(TII, AA, SawStore))
+ return false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg())
+ continue;
+ // FIXME: For now, do not remat any instruction with register operands.
+ // Later on, we can loosen the restriction is the register operands have
+ // not been modified between the def and use. Note, this is different from
+ // MachineSink because the code is no longer in two-address form (at least
+ // partially).
+ if (MO.isUse())
+ return false;
+ else if (!MO.isDead() && MO.getReg() != DstReg)
+ return false;
+ }
+ return true;
+}
+
+/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
+/// or volatile memory reference, or if the information describing the memory
+/// reference is not available. Return false if it is known to have no ordered
+/// memory references.
+bool MachineInstr::hasOrderedMemoryRef() const {
+ // An instruction known never to access memory won't have a volatile access.
+ if (!mayStore() &&
+ !mayLoad() &&
+ !isCall() &&
+ !hasUnmodeledSideEffects())
+ return false;
+
+ // Otherwise, if the instruction has no memory reference information,
+ // conservatively assume it wasn't preserved.
+ if (memoperands_empty())
+ return true;
+
+ // Check the memory reference information for ordered references.
+ for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
+ if (!(*I)->isUnordered())
+ return true;
+
+ return false;
+}
+
+/// isInvariantLoad - Return true if this instruction is loading from a
+/// location whose value is invariant across the function. For example,
+/// loading a value from the constant pool or from the argument area
+/// of a function if it does not change. This should only return true of
+/// *all* loads the instruction does are invariant (if it does multiple loads).
+bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+ // If the instruction doesn't load at all, it isn't an invariant load.
+ if (!mayLoad())
+ return false;
+
+ // If the instruction has lost its memoperands, conservatively assume that
+ // it may not be an invariant load.
+ if (memoperands_empty())
+ return false;
+
+ const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+
+ for (mmo_iterator I = memoperands_begin(),
+ E = memoperands_end(); I != E; ++I) {
+ if ((*I)->isVolatile()) return false;
+ if ((*I)->isStore()) return false;
+ if ((*I)->isInvariant()) return true;
+
+ if (const Value *V = (*I)->getValue()) {
+ // A load from a constant PseudoSourceValue is invariant.
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+ if (PSV->isConstant(MFI))
+ continue;
+ // If we have an AliasAnalysis, ask it whether the memory is constant.
+ if (AA && AA->pointsToConstantMemory(
+ AliasAnalysis::Location(V, (*I)->getSize(),
+ (*I)->getTBAAInfo())))
+ continue;
+ }
+
+ // Otherwise assume conservatively.
+ return false;
+ }
+
+ // Everything checks out.
+ return true;
+}
+
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+ if (!isPHI())
+ return 0;
+ assert(getNumOperands() >= 3 &&
+ "It's illegal to have a PHI without source operands");
+
+ unsigned Reg = getOperand(1).getReg();
+ for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+ if (getOperand(i).getReg() != Reg)
+ return 0;
+ return Reg;
+}
+
+bool MachineInstr::hasUnmodeledSideEffects() const {
+ if (hasProperty(MCID::UnmodeledSideEffects))
+ return true;
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ return true;
+ }
+
+ return false;
+}
+
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+ for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ if (!MO.isDead())
+ return false;
+ }
+ return true;
+}
+
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(MachineFunction &MF,
+ const MachineInstr *MI) {
+ for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ addOperand(MF, MO);
+ }
+}
+
+void MachineInstr::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << " " << *this;
+#endif
+}
+
+static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
+ raw_ostream &CommentOS) {
+ const LLVMContext &Ctx = MF->getFunction()->getContext();
+ if (!DL.isUnknown()) { // Print source line info.
+ DIScope Scope(DL.getScope(Ctx));
+ // Omit the directory, because it's likely to be long and uninteresting.
+ if (Scope.Verify())
+ CommentOS << Scope.getFilename();
+ else
+ CommentOS << "<unknown>";
+ CommentOS << ':' << DL.getLine();
+ if (DL.getCol() != 0)
+ CommentOS << ':' << DL.getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ CommentOS << " @[ ";
+ printDebugLoc(InlinedAtDL, MF, CommentOS);
+ CommentOS << " ]";
+ }
+ }
+}
+
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
+ bool SkipOpers) const {
+ // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+ const MachineFunction *MF = 0;
+ const MachineRegisterInfo *MRI = 0;
+ if (const MachineBasicBlock *MBB = getParent()) {
+ MF = MBB->getParent();
+ if (!TM && MF)
+ TM = &MF->getTarget();
+ if (MF)
+ MRI = &MF->getRegInfo();
+ }
+
+ // Save a list of virtual registers.
+ SmallVector<unsigned, 8> VirtRegs;
+
+ // Print explicitly defined operands on the left of an assignment syntax.
+ unsigned StartOp = 0, e = getNumOperands();
+ for (; StartOp < e && getOperand(StartOp).isReg() &&
+ getOperand(StartOp).isDef() &&
+ !getOperand(StartOp).isImplicit();
+ ++StartOp) {
+ if (StartOp != 0) OS << ", ";
+ getOperand(StartOp).print(OS, TM);
+ unsigned Reg = getOperand(StartOp).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ VirtRegs.push_back(Reg);
+ }
+
+ if (StartOp != 0)
+ OS << " = ";
+
+ // Print the opcode name.
+ if (TM && TM->getInstrInfo())
+ OS << TM->getInstrInfo()->getName(getOpcode());
+ else
+ OS << "UNKNOWN";
+
+ if (SkipOpers)
+ return;
+
+ // Print the rest of the operands.
+ bool OmittedAnyCallClobbers = false;
+ bool FirstOp = true;
+ unsigned AsmDescOp = ~0u;
+ unsigned AsmOpCount = 0;
+
+ if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
+ // Print asm string.
+ OS << " ";
+ getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+ // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ OS << " [sideeffect]";
+ if (ExtraInfo & InlineAsm::Extra_MayLoad)
+ OS << " [mayload]";
+ if (ExtraInfo & InlineAsm::Extra_MayStore)
+ OS << " [maystore]";
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ OS << " [alignstack]";
+ if (getInlineAsmDialect() == InlineAsm::AD_ATT)
+ OS << " [attdialect]";
+ if (getInlineAsmDialect() == InlineAsm::AD_Intel)
+ OS << " [inteldialect]";
+
+ StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
+ FirstOp = false;
+ }
+
+
+ for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ VirtRegs.push_back(MO.getReg());
+
+ // Omit call-clobbered registers which aren't used anywhere. This makes
+ // call instructions much less noisy on targets where calls clobber lots
+ // of registers. Don't rely on MO.isDead() because we may be called before
+ // LiveVariables is run, or we may be looking at a non-allocatable reg.
+ if (MF && isCall() &&
+ MO.isReg() && MO.isImplicit() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (MRI.use_empty(Reg)) {
+ bool HasAliasLive = false;
+ for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true);
+ AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (!MRI.use_empty(AliasReg)) {
+ HasAliasLive = true;
+ break;
+ }
+ }
+ if (!HasAliasLive) {
+ OmittedAnyCallClobbers = true;
+ continue;
+ }
+ }
+ }
+ }
+
+ if (FirstOp) FirstOp = false; else OS << ",";
+ OS << " ";
+ if (i < getDesc().NumOperands) {
+ const MCOperandInfo &MCOI = getDesc().OpInfo[i];
+ if (MCOI.isPredicate())
+ OS << "pred:";
+ if (MCOI.isOptionalDef())
+ OS << "opt:";
+ }
+ if (isDebugValue() && MO.isMetadata()) {
+ // Pretty print DBG_VALUE instructions.
+ const MDNode *MD = MO.getMetadata();
+ if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2)))
+ OS << "!\"" << MDS->getString() << '\"';
+ else
+ MO.print(OS, TM);
+ } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+ OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+ } else if (i == AsmDescOp && MO.isImm()) {
+ // Pretty print the inline asm operand descriptor.
+ OS << '$' << AsmOpCount++;
+ unsigned Flag = MO.getImm();
+ switch (InlineAsm::getKind(Flag)) {
+ case InlineAsm::Kind_RegUse: OS << ":[reguse"; break;
+ case InlineAsm::Kind_RegDef: OS << ":[regdef"; break;
+ case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break;
+ case InlineAsm::Kind_Clobber: OS << ":[clobber"; break;
+ case InlineAsm::Kind_Imm: OS << ":[imm"; break;
+ case InlineAsm::Kind_Mem: OS << ":[mem"; break;
+ default: OS << ":[??" << InlineAsm::getKind(Flag); break;
+ }
+
+ unsigned RCID = 0;
+ if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ if (TM)
+ OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName();
+ else
+ OS << ":RC" << RCID;
+ }
+
+ unsigned TiedTo = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ OS << " tiedto:$" << TiedTo;
+
+ OS << ']';
+
+ // Compute the index of the next operand descriptor.
+ AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
+ } else
+ MO.print(OS, TM);
+ }
+
+ // Briefly indicate whether any call clobbers were omitted.
+ if (OmittedAnyCallClobbers) {
+ if (!FirstOp) OS << ",";
+ OS << " ...";
+ }
+
+ bool HaveSemi = false;
+ const unsigned PrintableFlags = FrameSetup;
+ if (Flags & PrintableFlags) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ OS << " flags: ";
+
+ if (Flags & FrameSetup)
+ OS << "FrameSetup";
+ }
+
+ if (!memoperands_empty()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+
+ OS << " mem:";
+ for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
+ i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ }
+
+ // Print the regclass of any virtual registers encountered.
+ if (MRI && !VirtRegs.empty()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ for (unsigned i = 0; i != VirtRegs.size(); ++i) {
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
+ OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
+ for (unsigned j = i+1; j != VirtRegs.size();) {
+ if (MRI->getRegClass(VirtRegs[j]) != RC) {
+ ++j;
+ continue;
+ }
+ if (VirtRegs[i] != VirtRegs[j])
+ OS << "," << PrintReg(VirtRegs[j]);
+ VirtRegs.erase(VirtRegs.begin()+j);
+ }
+ }
+ }
+
+ // Print debug location information.
+ if (isDebugValue() && getOperand(e - 1).isMetadata()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ DIVariable DV(getOperand(e - 1).getMetadata());
+ OS << " line no:" << DV.getLineNumber();
+ if (MDNode *InlinedAt = DV.getInlinedAt()) {
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
+ if (!InlinedAtDL.isUnknown()) {
+ OS << " inlined @[ ";
+ printDebugLoc(InlinedAtDL, MF, OS);
+ OS << " ]";
+ }
+ }
+ } else if (!debugLoc.isUnknown() && MF) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ OS << " dbg:";
+ printDebugLoc(debugLoc, MF, OS);
+ }
+
+ OS << '\n';
+}
+
+bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isKill())
+ // The register is already marked kill.
+ return true;
+ if (isPhysReg && isRegTiedToDefOperand(i))
+ // Two-address uses of physregs must not be marked kill.
+ return true;
+ MO.setIsKill();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isKill() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // A super-register kill already exists.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded kill operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsKill(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is killed. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ true /*IsKill*/));
+ return true;
+ }
+ return Found;
+}
+
+void MachineInstr::clearRegisterKills(unsigned Reg,
+ const TargetRegisterInfo *RegInfo) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ RegInfo = 0;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned OpReg = MO.getReg();
+ if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg)))
+ MO.setIsKill(false);
+ }
+}
+
+bool MachineInstr::addRegisterDead(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ MO.setIsDead();
+ Found = true;
+ } else if (hasAliases && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // There exists a super-register that's marked dead.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded dead operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsDead(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is dead. Add a
+ // new implicit operand if required.
+ if (Found || !AddIfNotFound)
+ return Found;
+
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ true /*IsDead*/));
+ return true;
+}
+
+void MachineInstr::addRegisterDefined(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) {
+ MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+ if (MO)
+ return;
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() &&
+ MO.getSubReg() == 0)
+ return;
+ }
+ }
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/));
+}
+
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
+ const TargetRegisterInfo &TRI) {
+ bool HasRegMask = false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (MO.isRegMask()) {
+ HasRegMask = true;
+ continue;
+ }
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ bool Dead = true;
+ for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ I != E; ++I)
+ if (TRI.regsOverlap(*I, Reg)) {
+ Dead = false;
+ break;
+ }
+ // If there are no uses, including partial uses, the def is dead.
+ if (Dead) MO.setIsDead();
+ }
+
+ // This is a call with a register mask operand.
+ // Mask clobbers are always dead, so add defs for the non-dead defines.
+ if (HasRegMask)
+ for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ I != E; ++I)
+ addRegisterDefined(*I, &TRI);
+}
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+ // Build up a buffer of hash code components.
+ SmallVector<size_t, 8> HashComponents;
+ HashComponents.reserve(MI->getNumOperands() + 1);
+ HashComponents.push_back(MI->getOpcode());
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+
+ HashComponents.push_back(hash_value(MO));
+ }
+ return hash_combine_range(HashComponents.begin(), HashComponents.end());
+}
+
+void MachineInstr::emitError(StringRef Msg) const {
+ // Find the source location cookie.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
+ for (unsigned i = getNumOperands(); i != 0; --i) {
+ if (getOperand(i-1).isMetadata() &&
+ (LocMD = getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ if (const MachineBasicBlock *MBB = getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+ report_fatal_error(Msg);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 0000000..77bcd1d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,330 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+namespace {
+ class UnpackMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ UnpackMachineBundles() : MachineFunctionPass(ID) {
+ initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
+ "Unpack machine instruction bundles", false, false)
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
+ MIE = MBB->instr_end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // Remove BUNDLE instruction and the InsideBundle flags from bundled
+ // instructions.
+ if (MI->isBundle()) {
+ while (++MII != MIE && MII->isBundledWithPred()) {
+ MII->unbundleFromPred();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (MO.isReg() && MO.isInternalRead())
+ MO.setIsInternalRead(false);
+ }
+ }
+ MI->eraseFromParent();
+
+ Changed = true;
+ continue;
+ }
+
+ ++MII;
+ }
+ }
+
+ return Changed;
+}
+
+
+namespace {
+ class FinalizeMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ FinalizeMachineBundles() : MachineFunctionPass(ID) {
+ initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} // end anonymous namespace
+
+char FinalizeMachineBundles::ID = 0;
+char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID;
+INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles",
+ "Finalize machine instruction bundles", false, false)
+
+bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ return llvm::finalizeBundles(MF);
+}
+
+
+/// finalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ assert(FirstMI != LastMI && "Empty bundle?");
+ MIBundleBuilder Bundle(MBB, FirstMI, LastMI);
+
+ const TargetMachine &TM = MBB.getParent()->getTarget();
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(),
+ TII->get(TargetOpcode::BUNDLE));
+ Bundle.prepend(MIB);
+
+ SmallVector<unsigned, 32> LocalDefs;
+ SmallSet<unsigned, 32> LocalDefSet;
+ SmallSet<unsigned, 8> DeadDefSet;
+ SmallSet<unsigned, 16> KilledDefSet;
+ SmallVector<unsigned, 8> ExternUses;
+ SmallSet<unsigned, 8> ExternUseSet;
+ SmallSet<unsigned, 8> KilledUseSet;
+ SmallSet<unsigned, 8> UndefUseSet;
+ SmallVector<MachineOperand*, 4> Defs;
+ for (; FirstMI != LastMI; ++FirstMI) {
+ for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = FirstMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(&MO);
+ continue;
+ }
+
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (LocalDefSet.count(Reg)) {
+ MO.setIsInternalRead();
+ if (MO.isKill())
+ // Internal def is now killed.
+ KilledDefSet.insert(Reg);
+ } else {
+ if (ExternUseSet.insert(Reg)) {
+ ExternUses.push_back(Reg);
+ if (MO.isUndef())
+ UndefUseSet.insert(Reg);
+ }
+ if (MO.isKill())
+ // External def is now killed.
+ KilledUseSet.insert(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ MachineOperand &MO = *Defs[i];
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (LocalDefSet.insert(Reg)) {
+ LocalDefs.push_back(Reg);
+ if (MO.isDead()) {
+ DeadDefSet.insert(Reg);
+ }
+ } else {
+ // Re-defined inside the bundle, it's no longer killed.
+ KilledDefSet.erase(Reg);
+ if (!MO.isDead())
+ // Previously defined but dead.
+ DeadDefSet.erase(Reg);
+ }
+
+ if (!MO.isDead()) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (LocalDefSet.insert(SubReg))
+ LocalDefs.push_back(SubReg);
+ }
+ }
+ }
+
+ Defs.clear();
+ }
+
+ SmallSet<unsigned, 32> Added;
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ if (Added.insert(Reg)) {
+ // If it's not live beyond end of the bundle, mark it dead.
+ bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+ MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+ getImplRegState(true));
+ }
+ }
+
+ for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+ unsigned Reg = ExternUses[i];
+ bool isKill = KilledUseSet.count(Reg);
+ bool isUndef = UndefUseSet.count(Reg);
+ MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+ getImplRegState(true));
+ }
+}
+
+/// finalizeBundle - Same functionality as the previous finalizeBundle except
+/// the last instruction in the bundle is not provided as an input. This is
+/// used in cases where bundles are pre-determined by marking instructions
+/// with 'InsideBundle' marker. It returns the MBB instruction iterator that
+/// points to the end of the bundle.
+MachineBasicBlock::instr_iterator
+llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI) {
+ MachineBasicBlock::instr_iterator E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI);
+ while (LastMI != E && LastMI->isInsideBundle())
+ ++LastMI;
+ finalizeBundle(MBB, FirstMI, LastMI);
+ return LastMI;
+}
+
+/// finalizeBundles - Finalize instruction bundles in the specified
+/// MachineFunction. Return true if any bundles are finalized.
+bool llvm::finalizeBundles(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock &MBB = *I;
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
+ MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+ if (MII == MIE)
+ continue;
+ assert(!MII->isInsideBundle() &&
+ "First instr cannot be inside bundle before finalization!");
+
+ for (++MII; MII != MIE; ) {
+ if (!MII->isInsideBundle())
+ ++MII;
+ else {
+ MII = finalizeBundle(MBB, llvm::prior(MII));
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineOperand iterator
+//===----------------------------------------------------------------------===//
+
+MachineOperandIteratorBase::VirtRegInfo
+MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
+ SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
+ VirtRegInfo RI = { false, false, false };
+ for(; isValid(); ++*this) {
+ MachineOperand &MO = deref();
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ // Remember each (MI, OpNo) that refers to Reg.
+ if (Ops)
+ Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+
+ // Both defs and uses can read virtual registers.
+ if (MO.readsReg()) {
+ RI.Reads = true;
+ if (MO.isDef())
+ RI.Tied = true;
+ }
+
+ // Only defs can write.
+ if (MO.isDef())
+ RI.Writes = true;
+ else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+ RI.Tied = true;
+ }
+ return RI;
+}
+
+MachineOperandIteratorBase::PhysRegInfo
+MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ bool AllDefsDead = true;
+ PhysRegInfo PRI = {false, false, false, false, false, false};
+
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "analyzePhysReg not given a physical register!");
+ for (; isValid(); ++*this) {
+ MachineOperand &MO = deref();
+
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ PRI.Clobbers = true; // Regmask clobbers Reg.
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned MOReg = MO.getReg();
+ if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
+ continue;
+
+ bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
+ bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
+
+ if (IsRegOrSuperReg && MO.readsReg()) {
+ // Reg or a super-reg is read, and perhaps killed also.
+ PRI.Reads = true;
+ PRI.Kills = MO.isKill();
+ }
+
+ if (IsRegOrOverlapping && MO.readsReg()) {
+ PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
+ }
+
+ if (!MO.isDef())
+ continue;
+
+ if (IsRegOrSuperReg) {
+ PRI.Defines = true; // Reg or a super-register is defined.
+ if (!MO.isDead())
+ AllDefsDead = false;
+ }
+ if (IsRegOrOverlapping)
+ PRI.Clobbers = true; // Reg or an overlapping reg is defined.
+ }
+
+ if (AllDefsDead && PRI.Defines)
+ PRI.DefinesDead = true; // Reg or super-register was defined and was dead.
+
+ return PRI;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 0000000..ed3ed4d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,1489 @@
+//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass does not attempt to throttle itself to limit register pressure.
+// The register allocation phases are expected to perform rematerialization
+// to recover when register pressure is high.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-licm"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool>
+AvoidSpeculation("avoid-speculation",
+ cl::desc("MachineLICM should avoid speculation"),
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumHoisted,
+ "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+ "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+ "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+ "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+ "Number of machine instructions hoisted out of loops post regalloc");
+
+namespace {
+ class MachineLICM : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetLoweringBase *TLI;
+ const TargetRegisterInfo *TRI;
+ const MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ const InstrItineraryData *InstrItins;
+ bool PreRegAlloc;
+
+ // Various analyses that we use...
+ AliasAnalysis *AA; // Alias analysis info.
+ MachineLoopInfo *MLI; // Current MachineLoopInfo
+ MachineDominatorTree *DT; // Machine dominator tree for the cur loop
+
+ // State that is updated as we process loops
+ bool Changed; // True if a loop is changed.
+ bool FirstInLoop; // True if it's the first LICM in the loop.
+ MachineLoop *CurLoop; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+
+ // Exit blocks for CurLoop.
+ SmallVector<MachineBasicBlock*, 8> ExitBlocks;
+
+ bool isExitBlock(const MachineBasicBlock *MBB) const {
+ return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
+ ExitBlocks.end();
+ }
+
+ // Track 'estimated' register pressure.
+ SmallSet<unsigned, 32> RegSeen;
+ SmallVector<unsigned, 8> RegPressure;
+
+ // Register pressure "limit" per register class. If the pressure
+ // is higher than the limit, then it's considered high.
+ SmallVector<unsigned, 8> RegLimit;
+
+ // Register pressure on path leading from loop preheader to current BB.
+ SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
+ // For each opcode, keep a list of potential CSE instructions.
+ DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+
+ enum {
+ SpeculateFalse = 0,
+ SpeculateTrue = 1,
+ SpeculateUnknown = 2
+ };
+
+ // If a MBB does not dominate loop exiting blocks then it may not safe
+ // to hoist loads from this block.
+ // Tri-state: 0 - false, 1 - true, 2 - unknown
+ unsigned SpeculationState;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineLICM() :
+ MachineFunctionPass(ID), PreRegAlloc(true) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit MachineLICM(bool PreRA) :
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ RegSeen.clear();
+ RegPressure.clear();
+ RegLimit.clear();
+ BackTrace.clear();
+ for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+ CI->second.clear();
+ CSEMap.clear();
+ }
+
+ private:
+ /// CandidateInfo - Keep track of information about hoisting candidates.
+ struct CandidateInfo {
+ MachineInstr *MI;
+ unsigned Def;
+ int FI;
+ CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+ : MI(mi), Def(def), FI(fi) {}
+ };
+
+ /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+ /// invariants out to the preheader.
+ void HoistRegionPostRA();
+
+ /// HoistPostRA - When an instruction is found to only use loop invariant
+ /// operands that is safe to hoist, this instruction is called to do the
+ /// dirty work.
+ void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+ /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+ /// gather register def and frame object update information.
+ void ProcessMI(MachineInstr *MI,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
+ SmallSet<int, 32> &StoredFIs,
+ SmallVector<CandidateInfo, 32> &Candidates);
+
+ /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
+ /// current loop.
+ void AddToLiveIns(unsigned Reg);
+
+ /// IsLICMCandidate - Returns true if the instruction may be a suitable
+ /// candidate for LICM. e.g. If the instruction is a call, then it's
+ /// obviously not safe to hoist it.
+ bool IsLICMCandidate(MachineInstr &I);
+
+ /// IsLoopInvariantInst - Returns true if the instruction is loop
+ /// invariant. I.e., all virtual register operands are defined outside of
+ /// the loop, physical registers aren't accessed (explicitly or implicitly),
+ /// and the instruction is hoistable.
+ ///
+ bool IsLoopInvariantInst(MachineInstr &I);
+
+ /// HasLoopPHIUse - Return true if the specified instruction is used by any
+ /// phi node in the current loop.
+ bool HasLoopPHIUse(const MachineInstr *MI) const;
+
+ /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+ /// and an use in the current loop, return true if the target considered
+ /// it 'high'.
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ unsigned Reg) const;
+
+ bool IsCheapInstruction(MachineInstr &MI) const;
+
+ /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+ /// check if hoisting an instruction of the given cost matrix can cause high
+ /// register pressure.
+ bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
+
+ /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+ /// the current block and update their register pressures to reflect the
+ /// effect of hoisting MI from the current block to the preheader.
+ void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
+ /// IsProfitableToHoist - Return true if it is potentially profitable to
+ /// hoist the given loop invariant.
+ bool IsProfitableToHoist(MachineInstr &MI);
+
+ /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
+ /// If not then a load from this mbb may not be safe to hoist.
+ bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+
+ void EnterScope(MachineBasicBlock *MBB);
+
+ void ExitScope(MachineBasicBlock *MBB);
+
+ /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
+ /// dominator tree node if its a leaf or all of its children are done. Walk
+ /// up the dominator tree to destroy ancestors which are now done.
+ void ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+
+ /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+ /// blocks dominated by the specified header block, and that are in the
+ /// current loop) in depth first order w.r.t the DominatorTree. This allows
+ /// us to visit definitions before uses, allowing us to hoist a loop body in
+ /// one pass without iteration.
+ ///
+ void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+ void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
+
+ /// getRegisterClassIDAndCost - For a given MI, register, and the operand
+ /// index, return the ID and cost of its representative register class by
+ /// reference.
+ void getRegisterClassIDAndCost(const MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ unsigned &RCId, unsigned &RCCost) const;
+
+ /// InitRegPressure - Find all virtual register references that are liveout
+ /// of the preheader to initialize the starting "register pressure". Note
+ /// this does not count live through (livein but not used) registers.
+ void InitRegPressure(MachineBasicBlock *BB);
+
+ /// UpdateRegPressure - Update estimate of register pressure after the
+ /// specified instruction.
+ void UpdateRegPressure(const MachineInstr *MI);
+
+ /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
+ /// the load itself could be hoisted. Return the unfolded and hoistable
+ /// load, or null if the load couldn't be unfolded or if it wouldn't
+ /// be hoistable.
+ MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+
+ /// LookForDuplicate - Find an instruction amount PrevMIs that is a
+ /// duplicate of MI. Return this instruction if it's found.
+ const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs);
+
+ /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
+ /// the preheader that compute the same value. If it's found, do a RAU on
+ /// with the definition of the existing instruction rather than hoisting
+ /// the instruction to the preheader.
+ bool EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+
+ /// MayCSE - Return true if the given instruction will be CSE'd if it's
+ /// hoisted out of the loop.
+ bool MayCSE(MachineInstr *MI);
+
+ /// Hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ /// It returns true if the instruction is hoisted.
+ bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
+
+ /// InitCSEMap - Initialize the CSE map with instructions that are in the
+ /// current loop preheader that may become duplicates of instructions that
+ /// are hoisted out of the loop.
+ void InitCSEMap(MachineBasicBlock *BB);
+
+ /// getCurPreheader - Get the preheader for the current loop, splitting
+ /// a critical edge if needed.
+ MachineBasicBlock *getCurPreheader();
+ };
+} // end anonymous namespace
+
+char MachineLICM::ID = 0;
+char &llvm::MachineLICMID = MachineLICM::ID;
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+
+/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
+/// loop that has a unique predecessor.
+static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
+ // Check whether this loop even has a unique predecessor.
+ if (!CurLoop->getLoopPredecessor())
+ return false;
+ // Ok, now check to see if any of its outer loops do.
+ for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+ if (L->getLoopPredecessor())
+ return false;
+ // None of them did, so this is the outermost with a unique predecessor.
+ return true;
+}
+
+bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+ Changed = FirstInLoop = false;
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ TLI = TM->getTargetLowering();
+ TRI = TM->getRegisterInfo();
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ InstrItins = TM->getInstrItineraryData();
+
+ PreRegAlloc = MRI->isSSA();
+
+ if (PreRegAlloc)
+ DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+ else
+ DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ DEBUG(dbgs() << MF.getName() << " ********\n");
+
+ if (PreRegAlloc) {
+ // Estimate register pressure during pre-regalloc pass.
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegPressure.resize(NumRC);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ RegLimit.resize(NumRC);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+ }
+
+ // Get our Loop information...
+ MLI = &getAnalysis<MachineLoopInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+ while (!Worklist.empty()) {
+ CurLoop = Worklist.pop_back_val();
+ CurPreheader = 0;
+ ExitBlocks.clear();
+
+ // If this is done before regalloc, only visit outer-most preheader-sporting
+ // loops.
+ if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
+ Worklist.append(CurLoop->begin(), CurLoop->end());
+ continue;
+ }
+
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ if (!PreRegAlloc)
+ HoistRegionPostRA();
+ else {
+ // CSEMap is initialized for loop header when the first instruction is
+ // being hoisted.
+ MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+ FirstInLoop = true;
+ HoistOutOfLoop(N);
+ CSEMap.clear();
+ }
+ }
+
+ return Changed;
+}
+
+/// InstructionStoresToFI - Return true if instruction stores to the
+/// specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end(); o != oe; ++o) {
+ if (!(*o)->isStore() || !(*o)->getValue())
+ continue;
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ if (Value->getFrameIndex() == FI)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICM::ProcessMI(MachineInstr *MI,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
+ SmallSet<int, 32> &StoredFIs,
+ SmallVector<CandidateInfo, 32> &Candidates) {
+ bool RuledOut = false;
+ bool HasNonInvariantUse = false;
+ unsigned Def = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isFI()) {
+ // Remember if the instruction stores to the frame index.
+ int FI = MO.getIndex();
+ if (!StoredFIs.count(FI) &&
+ MFI->isSpillSlotObjectIndex(FI) &&
+ InstructionStoresToFI(MI, FI))
+ StoredFIs.insert(FI);
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ // We can't hoist an instruction defining a physreg that is clobbered in
+ // the loop.
+ if (MO.isRegMask()) {
+ PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Not expecting virtual register!");
+
+ if (!MO.isDef()) {
+ if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
+ // If it's using a non-loop-invariant register, then it's obviously not
+ // safe to hoist.
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ if (MO.isImplicit()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegClobbers.set(*AI);
+ if (!MO.isDead())
+ // Non-dead implicit def? This cannot be hoisted.
+ RuledOut = true;
+ // No need to check if a dead implicit def is also defined by
+ // another instruction.
+ continue;
+ }
+
+ // FIXME: For now, avoid instructions with multiple defs, unless
+ // it's a dead implicit def.
+ if (Def)
+ RuledOut = true;
+ else
+ Def = Reg;
+
+ // If we have already seen another instruction that defines the same
+ // register, then this is not safe. Two defs is indicated by setting a
+ // PhysRegClobbers bit.
+ for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
+ if (PhysRegDefs.test(*AS))
+ PhysRegClobbers.set(*AS);
+ if (PhysRegClobbers.test(*AS))
+ // MI defined register is seen defined by another instruction in
+ // the loop, it cannot be a LICM candidate.
+ RuledOut = true;
+ PhysRegDefs.set(*AS);
+ }
+ }
+
+ // Only consider reloads for now and remats which do not have register
+ // operands. FIXME: Consider unfold load folding instructions.
+ if (Def && !RuledOut) {
+ int FI = INT_MIN;
+ if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+ (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+ Candidates.push_back(CandidateInfo(MI, Def, FI));
+ }
+}
+
+/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+/// invariants out to the preheader.
+void MachineLICM::HoistRegionPostRA() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ unsigned NumRegs = TRI->getNumRegs();
+ BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
+ BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
+
+ SmallVector<CandidateInfo, 32> Candidates;
+ SmallSet<int, 32> StoredFIs;
+
+ // Walk the entire region, count number of defs for each register, and
+ // collect potential LICM candidates.
+ const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *BB = Blocks[i];
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad()) continue;
+
+ // Conservatively treat live-in's as an external def.
+ // FIXME: That means a reload that're reused in successor block(s) will not
+ // be LICM'ed.
+ for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+ E = BB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegDefs.set(*AI);
+ }
+
+ SpeculationState = SpeculateUnknown;
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+ }
+ }
+
+ // Gather the registers read / clobbered by the terminator.
+ BitVector TermRegs(NumRegs);
+ MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
+ if (TI != Preheader->end()) {
+ for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = TI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ TermRegs.set(*AI);
+ }
+ }
+
+ // Now evaluate whether the potential candidates qualify.
+ // 1. Check if the candidate defined register is defined by another
+ // instruction in the loop.
+ // 2. If the candidate is a load from stack slot (always true for now),
+ // check if the slot is stored anywhere in the loop.
+ // 3. Make sure candidate def should not clobber
+ // registers read by the terminator. Similarly its def should not be
+ // clobbered by the terminator.
+ for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
+ if (Candidates[i].FI != INT_MIN &&
+ StoredFIs.count(Candidates[i].FI))
+ continue;
+
+ unsigned Def = Candidates[i].Def;
+ if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
+ bool Safe = true;
+ MachineInstr *MI = Candidates[i].MI;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.isDef() || !MO.getReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (PhysRegDefs.test(Reg) ||
+ PhysRegClobbers.test(Reg)) {
+ // If it's using a non-loop-invariant register, then it's obviously
+ // not safe to hoist.
+ Safe = false;
+ break;
+ }
+ }
+ if (Safe)
+ HoistPostRA(MI, Candidates[i].Def);
+ }
+ }
+}
+
+/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
+/// loop, and make sure it is not killed by any instructions in the loop.
+void MachineLICM::AddToLiveIns(unsigned Reg) {
+ const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *BB = Blocks[i];
+ if (!BB->isLiveIn(Reg))
+ BB->addLiveIn(Reg);
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+ if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+ MO.setIsKill(false);
+ }
+ }
+ }
+}
+
+/// HoistPostRA - When an instruction is found to only use loop invariant
+/// operands that is safe to hoist, this instruction is called to do the
+/// dirty work.
+void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
+ << MI->getParent()->getNumber() << ": " << *MI);
+
+ // Splice the instruction to the preheader.
+ MachineBasicBlock *MBB = MI->getParent();
+ Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
+
+ // Add register to livein list to all the BBs in the current loop since a
+ // loop invariant must be kept live throughout the whole loop. This is
+ // important to ensure later passes do not scavenge the def register.
+ AddToLiveIns(Def);
+
+ ++NumPostRAHoisted;
+ Changed = true;
+}
+
+// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
+// If not then a load from this mbb may not be safe to hoist.
+bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
+ if (SpeculationState != SpeculateUnknown)
+ return SpeculationState == SpeculateFalse;
+
+ if (BB != CurLoop->getHeader()) {
+ // Check loop exiting blocks.
+ SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
+ CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
+ for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
+ if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
+ SpeculationState = SpeculateTrue;
+ return false;
+ }
+ }
+
+ SpeculationState = SpeculateFalse;
+ return true;
+}
+
+void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+}
+
+void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ BackTrace.pop_back();
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ if (OpenChildren[Node])
+ return;
+
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+/// blocks dominated by the specified header block, and that are in the
+/// current loop) in depth first order w.r.t the DominatorTree. This allows
+/// us to visit definitions before uses, allowing us to hoist a loop body in
+/// one pass without iteration.
+///
+void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(HeaderN);
+ do {
+ MachineDomTreeNode *Node = WorkList.pop_back_val();
+ assert(Node != 0 && "Null dominator tree node?");
+ MachineBasicBlock *BB = Node->getBlock();
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad())
+ continue;
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB))
+ continue;
+
+ Scopes.push_back(Node);
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+
+ // Don't hoist things out of a large switch statement. This often causes
+ // code to be hoisted that wasn't going to be executed, and increases
+ // register pressure in a situation where it's likely to matter.
+ if (BB->succ_size() >= 25)
+ NumChildren = 0;
+
+ OpenChildren[Node] = NumChildren;
+ // Add children in reverse order as then the next popped worklist node is
+ // the first child of this node. This means we ultimately traverse the
+ // DOM tree in exactly the same order as if we'd recursed.
+ for (int i = (int)NumChildren-1; i >= 0; --i) {
+ MachineDomTreeNode *Child = Children[i];
+ ParentMap[Child] = Node;
+ WorkList.push_back(Child);
+ }
+ } while (!WorkList.empty());
+
+ if (Scopes.size() != 0) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ // Compute registers which are livein into the loop headers.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
+ }
+
+ // Now perform LICM.
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = Scopes[i];
+ MachineBasicBlock *MBB = Node->getBlock();
+
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ continue;
+
+ EnterScope(MBB);
+
+ // Process the block
+ SpeculationState = SpeculateUnknown;
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ) {
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ MachineInstr *MI = &*MII;
+ if (!Hoist(MI, Preheader))
+ UpdateRegPressure(MI);
+ MII = NextMII;
+ }
+
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ }
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// getRegisterClassIDAndCost - For a given MI, register, and the operand
+/// index, return the ID and cost of its representative register class.
+void
+MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ unsigned &RCId, unsigned &RCCost) const {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ MVT VT = *RC->vt_begin();
+ if (VT == MVT::Untyped) {
+ RCId = RC->getID();
+ RCCost = 1;
+ } else {
+ RCId = TLI->getRepRegClassFor(VT)->getID();
+ RCCost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+ // If the preheader has only a single predecessor and it ends with a
+ // fallthrough or an unconditional branch, then scan its predecessor for live
+ // defs as well. This happens whenever the preheader is created by splitting
+ // the critical edge from the loop predecessor to the loop header.
+ if (BB->pred_size() == 1) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+ InitRegPressure(*BB->pred_begin());
+ }
+
+ for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+ MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
+ if (MO.isDef())
+ RegPressure[RCId] += RCCost;
+ else {
+ bool isKill = isOperandKill(MO, MRI);
+ if (isNew && !isKill)
+ // Haven't seen this, it must be a livein.
+ RegPressure[RCId] += RCCost;
+ else if (!isNew && isKill)
+ RegPressure[RCId] -= RCCost;
+ }
+ }
+ }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (!isNew && isOperandKill(MO, MRI)) {
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
+ if (RCCost > RegPressure[RCId])
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= RCCost;
+ }
+ }
+
+ unsigned Idx = 0;
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.pop_back_val();
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
+ RegPressure[RCId] += RCCost;
+ ++Idx;
+ }
+}
+
+/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
+/// loads from global offset table or constant pool.
+static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
+ assert (MI.mayLoad() && "Expected MI that loads!");
+ for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
+ E = MI.memoperands_end(); I != E; ++I) {
+ if (const Value *V = (*I)->getValue()) {
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+ if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+ return true;
+ }
+ }
+ return false;
+}
+
+/// IsLICMCandidate - Returns true if the instruction may be a suitable
+/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+/// not safe to hoist it.
+bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+ // Check if it's safe to move the instruction.
+ bool DontMoveAcrossStore = true;
+ if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
+ return false;
+
+ // If it is load then check if it is guaranteed to execute by making sure that
+ // it dominates all exiting blocks. If it doesn't, then there is a path out of
+ // the loop which does not execute this load, so we can't hoist it. Loads
+ // from constant memory are not safe to speculate all the time, for example
+ // indexed load from a jump table.
+ // Stores and side effects are already checked by isSafeToMove.
+ if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+ !IsGuaranteedToExecute(I.getParent()))
+ return false;
+
+ return true;
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+///
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+ if (!IsLICMCandidate(I))
+ return false;
+
+ // The instruction is loop invariant if all of its operands are.
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I.getOperand(i);
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // Don't hoist an instruction that uses or defines a physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
+ return false;
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return false;
+ } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
+ // If the reg is live into the loop, we can't hoist an instruction
+ // which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (CurLoop->contains(MRI->getVRegDef(Reg)))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+
+/// HasLoopPHIUse - Return true if the specified instruction is used by a
+/// phi node and hoisting it could cause a copy to be inserted.
+bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
+ SmallVector<const MachineInstr*, 8> Work(1, MI);
+ do {
+ MI = Work.pop_back_val();
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ // A PHI may cause a copy to be inserted.
+ if (UseMI->isPHI()) {
+ // A PHI inside the loop causes a copy because the live range of Reg is
+ // extended across the PHI.
+ if (CurLoop->contains(UseMI))
+ return true;
+ // A PHI in an exit block can cause a copy to be inserted if the PHI
+ // has multiple predecessors in the loop with different values.
+ // For now, approximate by rejecting all exit blocks.
+ if (isExitBlock(UseMI->getParent()))
+ return true;
+ continue;
+ }
+ // Look past copies as well.
+ if (UseMI->isCopy() && CurLoop->contains(UseMI))
+ Work.push_back(UseMI);
+ }
+ }
+ } while (!Work.empty());
+ return false;
+}
+
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+ unsigned DefIdx, unsigned Reg) const {
+ if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+ return false;
+
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->isCopyLike())
+ continue;
+ if (!CurLoop->contains(UseMI->getParent()))
+ continue;
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+ return true;
+ }
+
+ // Only look at the first in loop use.
+ break;
+ }
+
+ return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+ if (MI.isAsCheapAsAMove() || MI.isCopyLike())
+ return true;
+ if (!InstrItins || InstrItins->isEmpty())
+ return false;
+
+ bool isCheap = false;
+ unsigned NumDefs = MI.getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &DefMO = MI.getOperand(i);
+ if (!DefMO.isReg() || !DefMO.isDef())
+ continue;
+ --NumDefs;
+ unsigned Reg = DefMO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+ return false;
+ isCheap = true;
+ }
+
+ return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+ bool CheapInstr) {
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ if (CI->second <= 0)
+ continue;
+
+ unsigned RCId = CI->first;
+ unsigned Limit = RegLimit[RCId];
+ int Cost = CI->second;
+
+ // Don't hoist cheap instructions if they would increase register pressure,
+ // even if we're under the limit.
+ if (CheapInstr)
+ return true;
+
+ for (unsigned i = BackTrace.size(); i != 0; --i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+ if (RP[RCId] + Cost >= Limit)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ // First compute the 'cost' of the instruction, i.e. its contribution
+ // to register pressure.
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
+ if (MO.isDef()) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Update register pressure of blocks from loop header to current block.
+ for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i];
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ unsigned RCId = CI->first;
+ RP[RCId] += CI->second;
+ }
+ }
+}
+
+/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
+/// the given loop invariant.
+bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+ if (MI.isImplicitDef())
+ return true;
+
+ // Besides removing computation from the loop, hoisting an instruction has
+ // these effects:
+ //
+ // - The value defined by the instruction becomes live across the entire
+ // loop. This increases register pressure in the loop.
+ //
+ // - If the value is used by a PHI in the loop, a copy will be required for
+ // lowering the PHI after extending the live range.
+ //
+ // - When hoisting the last use of a value in the loop, that value no longer
+ // needs to be live in the loop. This lowers register pressure in the loop.
+
+ bool CheapInstr = IsCheapInstruction(MI);
+ bool CreatesCopy = HasLoopPHIUse(&MI);
+
+ // Don't hoist a cheap instruction if it would create a copy in the loop.
+ if (CheapInstr && CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
+ return false;
+ }
+
+ // Rematerializable instructions should always be hoisted since the register
+ // allocator can just pull them down again when needed.
+ if (TII->isTriviallyReMaterializable(&MI, AA))
+ return true;
+
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // Cheap instructions will only be hoisted if they don't increase register
+ // pressure at all.
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
+ if (MO.isDef()) {
+ if (HasHighOperandLatency(MI, i, Reg)) {
+ DEBUG(dbgs() << "Hoist High Latency: " << MI);
+ ++NumHighLatency;
+ return true;
+ }
+ Cost[RCId] += RCCost;
+ } else if (isOperandKill(MO, MRI)) {
+ // Is a virtual register use is a kill, hoisting it out of the loop
+ // may actually reduce register pressure or be register pressure
+ // neutral.
+ Cost[RCId] -= RCCost;
+ }
+ }
+
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
+ DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
+ ++NumLowRP;
+ return true;
+ }
+
+ // Don't risk increasing register pressure if it would create copies.
+ if (CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
+ return false;
+ }
+
+ // Do not "speculate" in high register pressure situation. If an
+ // instruction is not guaranteed to be executed in the loop, it's best to be
+ // conservative.
+ if (AvoidSpeculation &&
+ (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+ DEBUG(dbgs() << "Won't speculate: " << MI);
+ return false;
+ }
+
+ // High register pressure situation, only hoist if the instruction is going
+ // to be remat'ed.
+ if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+ !MI.isInvariantLoad(AA)) {
+ DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
+ return false;
+ }
+
+ return true;
+}
+
+MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+ // Don't unfold simple loads.
+ if (MI->canFoldAsLoad())
+ return 0;
+
+ // If not, we may be able to unfold a load and hoist that.
+ // First test whether the instruction is loading from an amenable
+ // memory location.
+ if (!MI->isInvariantLoad(AA))
+ return 0;
+
+ // Next determine the register class for a temporary register.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc == 0) return 0;
+ const MCInstrDesc &MID = TII->get(NewOpc);
+ if (MID.getNumDefs() != 1) return 0;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
+ // Ok, we're unfolding. Create a temporary register and do the unfold.
+ unsigned Reg = MRI->createVirtualRegister(RC);
+
+ SmallVector<MachineInstr *, 2> NewMIs;
+ bool Success =
+ TII->unfoldMemoryOperand(MF, MI, Reg,
+ /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+ NewMIs);
+ (void)Success;
+ assert(Success &&
+ "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+ "succeeded!");
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator Pos = MI;
+ MBB->insert(Pos, NewMIs[0]);
+ MBB->insert(Pos, NewMIs[1]);
+ // If unfolding produced a load that wasn't loop-invariant or profitable to
+ // hoist, discard the new instructions and bail.
+ if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ return 0;
+ }
+
+ // Update register pressure for the unfolded instruction.
+ UpdateRegPressure(NewMIs[1]);
+
+ // Otherwise we successfully unfolded a load that we can hoist.
+ MI->eraseFromParent();
+ return NewMIs[0];
+}
+
+void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+ }
+ }
+}
+
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs) {
+ for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
+ const MachineInstr *PrevMI = PrevMIs[i];
+ if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
+ return PrevMI;
+ }
+ return 0;
+}
+
+bool MachineLICM::EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+ // Replace virtual registers defined by MI by their counterparts defined
+ // by Dup.
+ SmallVector<unsigned, 2> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ // Physical registers may not differ here.
+ assert((!MO.isReg() || MO.getReg() == 0 ||
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ MO.getReg() == Dup->getOperand(i).getReg()) &&
+ "Instructions with different phys regs are not identical!");
+
+ if (MO.isReg() && MO.isDef() &&
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ Defs.push_back(i);
+ }
+
+ SmallVector<const TargetRegisterClass*, 2> OrigRCs;
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ OrigRCs.push_back(MRI->getRegClass(DupReg));
+
+ if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
+ // Restore old RCs if more than one defs.
+ for (unsigned j = 0; j != i; ++j)
+ MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
+ return false;
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ MRI->replaceRegWith(Reg, DupReg);
+ MRI->clearKillFlags(DupReg);
+ }
+
+ MI->eraseFromParent();
+ ++NumCSEed;
+ return true;
+ }
+ return false;
+}
+
+/// MayCSE - Return true if the given instruction will be CSE'd if it's
+/// hoisted out of the loop.
+bool MachineLICM::MayCSE(MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ return LookForDuplicate(MI, CI->second) != 0;
+}
+
+/// Hoist - When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+///
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+ // First check whether we should hoist this instruction.
+ if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+ // If not, try unfolding a hoistable load.
+ MI = ExtractHoistableLoad(MI);
+ if (!MI) return false;
+ }
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG({
+ dbgs() << "Hoisting " << *MI;
+ if (Preheader->getBasicBlock())
+ dbgs() << " to MachineBasicBlock "
+ << Preheader->getName();
+ if (MI->getParent()->getBasicBlock())
+ dbgs() << " from MachineBasicBlock "
+ << MI->getParent()->getName();
+ dbgs() << "\n";
+ });
+
+ // If this is the first instruction being hoisted to the preheader,
+ // initialize the CSE map with potential common expressions.
+ if (FirstInLoop) {
+ InitCSEMap(Preheader);
+ FirstInLoop = false;
+ }
+
+ // Look for opportunity to CSE the hoisted instruction.
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (!EliminateCSE(MI, CI)) {
+ // Otherwise, splice the instruction to the preheader.
+ Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+
+ // Update register pressure for BBs from header to this block.
+ UpdateBackTraceRegPressure(MI);
+
+ // Clear the kill flags of any register this instruction defines,
+ // since they may need to be live throughout the entire loop
+ // rather than just live for part of it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && !MO.isDead())
+ MRI->clearKillFlags(MO.getReg());
+ }
+
+ // Add to the CSE map.
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+ }
+ }
+
+ ++NumHoisted;
+ Changed = true;
+
+ return true;
+}
+
+MachineBasicBlock *MachineLICM::getCurPreheader() {
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop predecessor, we can't do any hoisting.
+
+ // If we've tried to get a preheader and failed, don't try again.
+ if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
+ return 0;
+
+ if (!CurPreheader) {
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader) {
+ MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
+ if (!Pred) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+
+ CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
+ if (!CurPreheader) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+ }
+ }
+ return CurPreheader;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 0000000..4e2cfdc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,81 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG. Note that
+// the loops identified may actually be several natural loops that share the
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
+template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
+template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
+
+char MachineLoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+
+char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
+ return false;
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineBasicBlock *MachineLoop::getTopBlock() {
+ MachineBasicBlock *TopMBB = getHeader();
+ MachineFunction::iterator Begin = TopMBB->getParent()->begin();
+ if (TopMBB != Begin) {
+ MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB));
+ while (contains(PriorMBB)) {
+ TopMBB = PriorMBB;
+ if (TopMBB == Begin) break;
+ PriorMBB = prior(MachineFunction::iterator(TopMBB));
+ }
+ }
+ return TopMBB;
+}
+
+MachineBasicBlock *MachineLoop::getBottomBlock() {
+ MachineBasicBlock *BotMBB = getHeader();
+ MachineFunction::iterator End = BotMBB->getParent()->end();
+ if (BotMBB != prior(End)) {
+ MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+ while (contains(NextMBB)) {
+ BotMBB = NextMBB;
+ if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break;
+ NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+ }
+ }
+ return BotMBB;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineLoop::dump() const {
+ print(dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 0000000..0ea9ae0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,578 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use DataLayout's.
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+ "Machine Module Information", false, false)
+char MachineModuleInfo::ID = 0;
+
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+
+namespace llvm {
+class MMIAddrLabelMapCallbackPtr : CallbackVH {
+ MMIAddrLabelMap *Map;
+public:
+ MMIAddrLabelMapCallbackPtr() : Map(0) {}
+ MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
+
+ void setPtr(BasicBlock *BB) {
+ ValueHandleBase::operator=(BB);
+ }
+
+ void setMap(MMIAddrLabelMap *map) { Map = map; }
+
+ virtual void deleted();
+ virtual void allUsesReplacedWith(Value *V2);
+};
+
+class MMIAddrLabelMap {
+ MCContext &Context;
+ struct AddrLabelSymEntry {
+ /// Symbols - The symbols for the label. This is a pointer union that is
+ /// either one symbol (the common case) or a list of symbols.
+ PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
+
+ Function *Fn; // The containing function of the BasicBlock.
+ unsigned Index; // The index in BBCallbacks for the BasicBlock.
+ };
+
+ DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+
+ /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We
+ /// use this so we get notified if a block is deleted or RAUWd.
+ std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
+
+ /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
+ /// whose corresponding BasicBlock got deleted. These symbols need to be
+ /// emitted at some point in the file, so AsmPrinter emits them after the
+ /// function body.
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+ DeletedAddrLabelsNeedingEmission;
+public:
+
+ MMIAddrLabelMap(MCContext &context) : Context(context) {}
+ ~MMIAddrLabelMap() {
+ assert(DeletedAddrLabelsNeedingEmission.empty() &&
+ "Some labels for deleted blocks never got emitted");
+
+ // Deallocate any of the 'list of symbols' case.
+ for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
+ I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
+ if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
+ delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
+ }
+
+ MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
+ std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+ void takeDeletedSymbolsForFunction(Function *F,
+ std::vector<MCSymbol*> &Result);
+
+ void UpdateForDeletedBlock(BasicBlock *BB);
+ void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+}
+
+MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ // If we already had an entry for this block, just return it.
+ if (!Entry.Symbols.isNull()) {
+ assert(BB->getParent() == Entry.Fn && "Parent changed");
+ if (Entry.Symbols.is<MCSymbol*>())
+ return Entry.Symbols.get<MCSymbol*>();
+ return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
+ }
+
+ // Otherwise, this is a new entry, create a new symbol for it and add an
+ // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+ BBCallbacks.push_back(BB);
+ BBCallbacks.back().setMap(this);
+ Entry.Index = BBCallbacks.size()-1;
+ Entry.Fn = BB->getParent();
+ MCSymbol *Result = Context.CreateTempSymbol();
+ Entry.Symbols = Result;
+ return Result;
+}
+
+std::vector<MCSymbol*>
+MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ std::vector<MCSymbol*> Result;
+
+ // If we already had an entry for this block, just return it.
+ if (Entry.Symbols.isNull())
+ Result.push_back(getAddrLabelSymbol(BB));
+ else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>())
+ Result.push_back(Sym);
+ else
+ Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>();
+ return Result;
+}
+
+
+/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
+/// them.
+void MMIAddrLabelMap::
+takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+ DeletedAddrLabelsNeedingEmission.find(F);
+
+ // If there are no entries for the function, just return.
+ if (I == DeletedAddrLabelsNeedingEmission.end()) return;
+
+ // Otherwise, take the list.
+ std::swap(Result, I->second);
+ DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+
+void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+ // If the block got deleted, there is no need for the symbol. If the symbol
+ // was already emitted, we can just forget about it, otherwise we need to
+ // queue it up for later emission when the function is output.
+ AddrLabelSymEntry Entry = AddrLabelSymbols[BB];
+ AddrLabelSymbols.erase(BB);
+ assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+ BBCallbacks[Entry.Index] = 0; // Clear the callback.
+
+ assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) &&
+ "Block/parent mismatch");
+
+ // Handle both the single and the multiple symbols cases.
+ if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
+ if (Sym->isDefined())
+ return;
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ } else {
+ std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>();
+
+ for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
+ MCSymbol *Sym = (*Syms)[i];
+ if (Sym->isDefined()) continue; // Ignore already emitted labels.
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from
+ // 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ }
+
+ // The entry is deleted, free the memory associated with the symbol list.
+ delete Syms;
+ }
+}
+
+void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+ // Get the entry for the RAUW'd block and remove it from our map.
+ AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old];
+ AddrLabelSymbols.erase(Old);
+ assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+
+ AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+ // If New is not address taken, just move our symbol over to it.
+ if (NewEntry.Symbols.isNull()) {
+ BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
+ NewEntry = OldEntry; // Set New's entry.
+ return;
+ }
+
+ BBCallbacks[OldEntry.Index] = 0; // Update the callback.
+
+ // Otherwise, we need to add the old symbol to the new block's set. If it is
+ // just a single entry, upgrade it to a symbol list.
+ if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) {
+ std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>();
+ SymList->push_back(PrevSym);
+ NewEntry.Symbols = SymList;
+ }
+
+ std::vector<MCSymbol*> *SymList =
+ NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
+
+ // If the old entry was a single symbol, add it.
+ if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) {
+ SymList->push_back(Sym);
+ return;
+ }
+
+ // Otherwise, concatenate the list.
+ std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
+ SymList->insert(SymList->end(), Syms->begin(), Syms->end());
+ delete Syms;
+}
+
+
+void MMIAddrLabelMapCallbackPtr::deleted() {
+ Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+ Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+ const MCRegisterInfo &MRI,
+ const MCObjectFileInfo *MOFI)
+ : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineModuleInfo::MachineModuleInfo()
+ : ImmutablePass(ID),
+ Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) {
+ llvm_unreachable("This MachineModuleInfo constructor should never be called, "
+ "MMI should always be explicitly constructed by "
+ "LLVMTargetMachine");
+}
+
+MachineModuleInfo::~MachineModuleInfo() {
+}
+
+bool MachineModuleInfo::doInitialization(Module &M) {
+
+ ObjFileMMI = 0;
+ CompactUnwindEncoding = 0;
+ CurCallSite = 0;
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+ DbgInfoAvailable = UsesVAFloatArgument = false;
+ // Always emit some info, by default "no personality" info.
+ Personalities.push_back(NULL);
+ AddrLabelSymbols = 0;
+ TheModule = 0;
+
+ return false;
+}
+
+bool MachineModuleInfo::doFinalization(Module &M) {
+
+ Personalities.clear();
+
+ delete AddrLabelSymbols;
+ AddrLabelSymbols = 0;
+
+ Context.reset();
+
+ delete ObjFileMMI;
+ ObjFileMMI = 0;
+
+ return false;
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+ // Clean up frame info.
+ FrameMoves.clear();
+
+ // Clean up exception info.
+ LandingPads.clear();
+ CallSiteMap.clear();
+ TypeInfos.clear();
+ FilterIds.clear();
+ FilterEnds.clear();
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+ CompactUnwindEncoding = 0;
+ VariableDbgInfo.clear();
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(const Module &M) {
+ // Insert functions in the llvm.used array (but not llvm.compiler.used) into
+ // UsedFunctions.
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const Function *F =
+ dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
+ UsedFunctions.insert(F);
+}
+
+//===- Address of Block Management ----------------------------------------===//
+
+
+/// getAddrLabelSymbol - Return the symbol to be used for the specified basic
+/// block when its address is taken. This cannot be its normal LBB label
+/// because the block may be accessed outside its containing function.
+MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (AddrLabelSymbols == 0)
+ AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB));
+}
+
+/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
+/// basic block when its address is taken. If other blocks were RAUW'd to
+/// this one, we may have to emit them as well, return the whole set.
+std::vector<MCSymbol*> MachineModuleInfo::
+getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (AddrLabelSymbols == 0)
+ AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
+}
+
+
+/// takeDeletedSymbolsForFunction - If the specified function has had any
+/// references to address-taken blocks generated, but the block got deleted,
+/// return the symbol now so we can emit it. This prevents emitting a
+/// reference to a symbol that has no definition.
+void MachineModuleInfo::
+takeDeletedSymbolsForFunction(const Function *F,
+ std::vector<MCSymbol*> &Result) {
+ // If no blocks have had their addresses taken, we're done.
+ if (AddrLabelSymbols == 0) return;
+ return AddrLabelSymbols->
+ takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
+}
+
+//===- EH -----------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+ (MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+ MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+ MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+ const Function *Personality) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.Personality = Personality;
+
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ if (Personalities[i] == Personality)
+ return;
+
+ // If this is the first personality we're adding go
+ // ahead and add it at the beginning.
+ if (Personalities[0] == NULL)
+ Personalities[0] = Personality;
+ else
+ Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::
+addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalVariable *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::
+addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalVariable *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter(TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.TypeIds.push_back(0);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ if (LandingPad.LandingPadLabel &&
+ !LandingPad.LandingPadLabel->isDefined() &&
+ (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+ LandingPad.LandingPadLabel = 0;
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "nounwind" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+ MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad.EndLabels[j];
+ if ((BeginLabel->isDefined() ||
+ (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+ (EndLabel->isDefined() ||
+ (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ --j, --e;
+ }
+
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ // If there is no landing pad, ensure that the list of typeids is empty.
+ // If the only typeid is a cleanup, this is the same as having no typeids.
+ if (!LandingPad.LandingPadBlock ||
+ (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+ LandingPad.TypeIds.clear();
+ ++i;
+ }
+}
+
+/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site
+/// indexes.
+void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
+ ArrayRef<unsigned> Sites) {
+ LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo. This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos. This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+ E = FilterEnds.end(); I != E; ++I) {
+ unsigned i = *I, j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+const Function *MachineModuleInfo::getPersonality() const {
+ // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+ // function
+ return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL/first personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+ const Function* Personality = NULL;
+
+ // Scan landing pads. If there is at least one non-NULL personality - use it.
+ for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
+ if (LandingPads[i].Personality) {
+ Personality = LandingPads[i].Personality;
+ break;
+ }
+
+ for (unsigned i = 0, e = Personalities.size(); i < e; ++i) {
+ if (Personalities[i] == Personality)
+ return i;
+ }
+
+ // This will happen if the current personality function is
+ // in the zero index.
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 0000000..a1c7e9f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::anchor() {}
+void MachineModuleInfoELF::anchor() {}
+
+static int SortSymbolPair(const void *LHS, const void *RHS) {
+ typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
+ const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
+ const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
+ return LHSS->getName().compare(RHSS->getName());
+}
+
+/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
+/// sorted orer.
+MachineModuleInfoImpl::SymbolListTy
+MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
+ MachineModuleInfoImpl::StubValueTy>&Map) {
+ MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
+ if (!List.empty())
+ qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+ return List;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 0000000..cb204fd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,55 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+void MachinePassRegistryListener::anchor() { }
+
+/// setDefault - Set the default constructor by name.
+void MachinePassRegistry::setDefault(StringRef Name) {
+ MachinePassCtor Ctor = 0;
+ for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) {
+ if (R->getName() == Name) {
+ Ctor = R->getCtor();
+ break;
+ }
+ }
+ assert(Ctor && "Unregistered pass name");
+ setDefault(Ctor);
+}
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+ Node->setNext(List);
+ List = Node;
+ if (Listener) Listener->NotifyAdd(Node->getName(),
+ Node->getCtor(),
+ Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+ for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+ if (*I == Node) {
+ if (Listener) Listener->NotifyRemove(Node->getName());
+ *I = (*I)->getNext();
+ break;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
new file mode 100644
index 0000000..c3f6e92
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -0,0 +1,55 @@
+//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// post dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePostDominators.h"
+
+using namespace llvm;
+
+char MachinePostDominatorTree::ID = 0;
+
+//declare initializeMachinePostDominatorTreePass
+INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
+ "MachinePostDominator Tree Construction", true, true)
+
+MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
+ initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+}
+
+FunctionPass *
+MachinePostDominatorTree::createMachinePostDominatorTreePass() {
+ return new MachinePostDominatorTree();
+}
+
+bool
+MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+ delete DT;
+}
+
+void
+MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void
+MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const {
+ DT->print(OS);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 0000000..1af00e8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,360 @@
+//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
+ : TRI(&TRI), IsSSA(true), TracksLiveness(true) {
+ VRegInfo.reserve(256);
+ RegAllocHints.reserve(256);
+ UsedRegUnits.resize(TRI.getNumRegUnits());
+ UsedPhysRegMask.resize(TRI.getNumRegs());
+
+ // Create the physreg use/def lists.
+ PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
+ memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
+}
+
+MachineRegisterInfo::~MachineRegisterInfo() {
+ delete [] PhysRegUseDefLists;
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
+ VRegInfo[Reg].first = RC;
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+ const TargetRegisterClass *RC,
+ unsigned MinNumRegs) {
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ if (OldRC == RC)
+ return RC;
+ const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC);
+ if (!NewRC || NewRC == OldRC)
+ return NewRC;
+ if (NewRC->getNumRegs() < MinNumRegs)
+ return 0;
+ setRegClass(Reg, NewRC);
+ return NewRC;
+}
+
+bool
+MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
+
+ // Stop early if there is no room to grow.
+ if (NewRC == OldRC)
+ return false;
+
+ // Accumulate constraints from all uses.
+ for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
+ ++I) {
+ const TargetRegisterClass *OpRC =
+ I->getRegClassConstraint(I.getOperandNo(), TII, TRI);
+ if (unsigned SubIdx = I.getOperand().getSubReg()) {
+ if (OpRC)
+ NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx);
+ else
+ NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx);
+ } else if (OpRC)
+ NewRC = TRI->getCommonSubClass(NewRC, OpRC);
+ if (!NewRC || NewRC == OldRC)
+ return false;
+ }
+ setRegClass(Reg, NewRC);
+ return true;
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+unsigned
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+ assert(RegClass && "Cannot create register without RegClass!");
+ assert(RegClass->isAllocatable() &&
+ "Virtual register RegClass must be allocatable.");
+
+ // New virtual register number.
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+ VRegInfo.grow(Reg);
+ VRegInfo[Reg].first = RegClass;
+ RegAllocHints.grow(Reg);
+ return Reg;
+}
+
+/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
+void MachineRegisterInfo::clearVirtRegs() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+ "Vreg use list non-empty still?");
+#endif
+ VRegInfo.clear();
+}
+
+/// Add MO to the linked list of operands for its register.
+void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
+ assert(!MO->isOnRegUseList() && "Already on list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+
+ // Head points to the first list element.
+ // Next is NULL on the last list element.
+ // Prev pointers are circular, so Head->Prev == Last.
+
+ // Head is NULL for an empty list.
+ if (!Head) {
+ MO->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Next = 0;
+ HeadRef = MO;
+ return;
+ }
+ assert(MO->getReg() == Head->getReg() && "Different regs on the same list!");
+
+ // Insert MO between Last and Head in the circular Prev chain.
+ MachineOperand *Last = Head->Contents.Reg.Prev;
+ assert(Last && "Inconsistent use list");
+ assert(MO->getReg() == Last->getReg() && "Different regs on the same list!");
+ Head->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Prev = Last;
+
+ // Def operands always precede uses. This allows def_iterator to stop early.
+ // Insert def operands at the front, and use operands at the back.
+ if (MO->isDef()) {
+ // Insert def at the front.
+ MO->Contents.Reg.Next = Head;
+ HeadRef = MO;
+ } else {
+ // Insert use at the end.
+ MO->Contents.Reg.Next = 0;
+ Last->Contents.Reg.Next = MO;
+ }
+}
+
+/// Remove MO from its use-def list.
+void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
+ assert(MO->isOnRegUseList() && "Operand not on use list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+ assert(Head && "List already empty");
+
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *Next = MO->Contents.Reg.Next;
+ MachineOperand *Prev = MO->Contents.Reg.Prev;
+
+ // Prev links are circular, next link is NULL instead of looping back to Head.
+ if (MO == Head)
+ HeadRef = Next;
+ else
+ Prev->Contents.Reg.Next = Next;
+
+ (Next ? Next : Head)->Contents.Reg.Prev = Prev;
+
+ MO->Contents.Reg.Prev = 0;
+ MO->Contents.Reg.Next = 0;
+}
+
+/// Move NumOps operands from Src to Dst, updating use-def lists as needed.
+///
+/// The Dst range is assumed to be uninitialized memory. (Or it may contain
+/// operands that won't be destroyed, which is OK because the MO destructor is
+/// trivial anyway).
+///
+/// The Src and Dst ranges may overlap.
+void MachineRegisterInfo::moveOperands(MachineOperand *Dst,
+ MachineOperand *Src,
+ unsigned NumOps) {
+ assert(Src != Dst && NumOps && "Noop moveOperands");
+
+ // Copy backwards if Dst is within the Src range.
+ int Stride = 1;
+ if (Dst >= Src && Dst < Src + NumOps) {
+ Stride = -1;
+ Dst += NumOps - 1;
+ Src += NumOps - 1;
+ }
+
+ // Copy one operand at a time.
+ do {
+ new (Dst) MachineOperand(*Src);
+
+ // Dst takes Src's place in the use-def chain.
+ if (Src->isReg()) {
+ MachineOperand *&Head = getRegUseDefListHead(Src->getReg());
+ MachineOperand *Prev = Src->Contents.Reg.Prev;
+ MachineOperand *Next = Src->Contents.Reg.Next;
+ assert(Head && "List empty, but operand is chained");
+ assert(Prev && "Operand was not on use-def list");
+
+ // Prev links are circular, next link is NULL instead of looping back to
+ // Head.
+ if (Src == Head)
+ Head = Dst;
+ else
+ Prev->Contents.Reg.Next = Dst;
+
+ // Update Prev pointer. This also works when Src was pointing to itself
+ // in a 1-element list. In that case Head == Dst.
+ (Next ? Next : Head)->Contents.Reg.Prev = Dst;
+ }
+
+ Dst += Stride;
+ Src += Stride;
+ } while (--NumOps);
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+ assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+ // TODO: This could be more efficient by bulk changing the operands.
+ for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ ++I;
+ O.setReg(ToReg);
+ }
+}
+
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found. This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+ // Since we are in SSA form, we can use the first definition.
+ def_iterator I = def_begin(Reg);
+ assert((I.atEnd() || llvm::next(I) == def_end()) &&
+ "getVRegDef assumes a single definition or no definition");
+ return !I.atEnd() ? &*I : 0;
+}
+
+/// getUniqueVRegDef - Return the unique machine instr that defines the
+/// specified virtual register or null if none is found. If there are
+/// multiple definitions or no definition, return null.
+MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
+ if (def_empty(Reg)) return 0;
+ def_iterator I = def_begin(Reg);
+ if (llvm::next(I) != def_end())
+ return 0;
+ return &*I;
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+ use_nodbg_iterator UI = use_nodbg_begin(RegNo);
+ if (UI == use_nodbg_end())
+ return false;
+ return ++UI == use_nodbg_end();
+}
+
+/// clearKillFlags - Iterate over all the uses of the given register and
+/// clear the kill flag from the MachineOperand. This function is used by
+/// optimization passes which extend register lifetimes and need only
+/// preserve conservative kill flag information.
+void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
+ for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI)
+ UI.getOperand().setIsKill(false);
+}
+
+bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->first == Reg || I->second == Reg)
+ return true;
+ return false;
+}
+
+/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->second == VReg)
+ return I->first;
+ return 0;
+}
+
+/// getLiveInVirtReg - If PReg is a live-in physical register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->first == PReg)
+ return I->second;
+ return 0;
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ // Emit the copies into the top of the block.
+ for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
+ if (LiveIns[i].second) {
+ if (use_empty(LiveIns[i].second)) {
+ // The livein has no uses. Drop it.
+ //
+ // It would be preferable to have isel avoid creating live-in
+ // records for unused arguments in the first place, but it's
+ // complicated by the debug info code for arguments.
+ LiveIns.erase(LiveIns.begin() + i);
+ --i; --e;
+ } else {
+ // Emit a copy.
+ BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::COPY), LiveIns[i].second)
+ .addReg(LiveIns[i].first);
+
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+ } else {
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+}
+
+#ifndef NDEBUG
+void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+ for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
+ I.getOperand().getParent()->dump();
+}
+#endif
+
+void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
+ ReservedRegs = TRI->getReservedRegs(MF);
+ assert(ReservedRegs.size() == TRI->getNumRegs() &&
+ "Invalid ReservedRegs vector from target");
+}
+
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
+ const MachineFunction &MF) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+
+ // Check if any overlapping register is modified, or allocatable so it may be
+ // used later.
+ for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
+ if (!def_empty(*AI) || isAllocatable(*AI))
+ return false;
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 0000000..bb6aad7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,364 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+using namespace llvm;
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+ SmallVectorImpl<MachineInstr*> *NewPHI)
+ : AV(0), InsertedPHIs(NewPHI) {
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+ delete static_cast<AvailableValsTy*>(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates. ProtoValue is the value used to name PHI nodes.
+void MachineSSAUpdater::Initialize(unsigned V) {
+ if (AV == 0)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+
+ VR = V;
+ VRC = MRI->getRegClass(VR);
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+ return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) {
+ if (BB->empty())
+ return 0;
+
+ MachineBasicBlock::iterator I = BB->begin();
+ if (!I->isPHI())
+ return 0;
+
+ AvailableValsTy AVals;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ AVals[PredValues[i].first] = PredValues[i].second;
+ while (I != BB->end() && I->isPHI()) {
+ bool Same = true;
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = I->getOperand(i).getReg();
+ MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+ if (AVals[SrcBB] != SrcReg) {
+ Same = false;
+ break;
+ }
+ }
+ if (Same)
+ return I->getOperand(0).getReg();
+ ++I;
+ }
+ return 0;
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstrBuilder InsertNewDef(unsigned Opcode,
+ MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+ const TargetRegisterClass *RC,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlockInternal(BB);
+
+ // If there are no predecessors, just return undef.
+ if (BB->pred_empty()) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ VRC, MRI, TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
+ unsigned SingularValue = 0;
+
+ bool isFirstPred = true;
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // If an identical PHI is already in BB, just reuse it.
+ unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+ if (DupPHI)
+ return DupPHI;
+
+ // Otherwise, we do need a PHI: insert one now.
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
+ MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+ Loc, VRC, MRI, TII);
+
+ // Fill in all the predecessors of the PHI.
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+ InsertedPHI->eraseFromParent();
+ return ConstVal;
+ }
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI->getOperand(0).getReg();
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+ MachineOperand *U) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ if (&MI->getOperand(i) == U)
+ return MI->getOperand(i+1).getMBB();
+ }
+
+ llvm_unreachable("MachineOperand::getParent() failure?");
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+ MachineInstr *UseMI = U.getParent();
+ unsigned NewVR = 0;
+ if (UseMI->isPHI()) {
+ MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+ NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+ } else {
+ NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+ }
+
+ U.setReg(NewVR);
+}
+
+void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
+ MRI->replaceRegWith(OldReg, NewReg);
+
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ for (DenseMap<MachineBasicBlock*, unsigned>::iterator
+ I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I)
+ if (I->second == OldReg)
+ I->second = NewReg;
+}
+
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<MachineSSAUpdater> {
+public:
+ typedef MachineBasicBlock BlkT;
+ typedef unsigned ValT;
+ typedef MachineInstr PhiT;
+
+ typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+ /// Iterator for PHI operands.
+ class PHI_iterator {
+ private:
+ MachineInstr *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(MachineInstr *P) // begin iterator
+ : PHI(P), idx(1) {}
+ PHI_iterator(MachineInstr *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumOperands()) {}
+
+ PHI_iterator &operator++() { idx += 2; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+ unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+ MachineBasicBlock *getIncomingBlock() {
+ return PHI->getOperand(idx+1).getMBB();
+ }
+ };
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+ /// vector.
+ static void FindPredecessorBlocks(MachineBasicBlock *BB,
+ SmallVectorImpl<MachineBasicBlock*> *Preds){
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
+
+ /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned GetUndefVal(MachineBasicBlock *BB,
+ MachineSSAUpdater *Updater) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ /// CreateEmptyPHI - Create a PHI instruction that defines a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+ MachineSSAUpdater *Updater) {
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
+ MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return PHI->getOperand(0).getReg();
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
+ MachineBasicBlock *Pred) {
+ MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred);
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static MachineInstr *InstrIsPHI(MachineInstr *I) {
+ if (I && I->isPHI())
+ return I;
+ return 0;
+ }
+
+ /// ValueIsPHI - Check if the instruction that defines the specified register
+ /// is a PHI instruction.
+ static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ return InstrIsPHI(Updater->MRI->getVRegDef(Val));
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ MachineInstr *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumOperands() <= 1)
+ return PHI;
+ return 0;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the register
+ /// that it defines.
+ static unsigned GetPHIValue(MachineInstr *PHI) {
+ return PHI->getOperand(0).getReg();
+ }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (unsigned V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
new file mode 100644
index 0000000..5bd2349
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -0,0 +1,2396 @@
+//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <queue>
+
+using namespace llvm;
+
+namespace llvm {
+cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+ cl::desc("Force top-down list scheduling"));
+cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+ cl::desc("Force bottom-up list scheduling"));
+}
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+ cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
+// Experimental heuristics
+static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
+ cl::desc("Enable load clustering."), cl::init(true));
+
+// Experimental heuristics
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
+ cl::desc("Verify machine instrs before and after machine scheduling"));
+
+// DAG subtrees must have at least this many nodes.
+static const unsigned MinSubtreeSize = 8;
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Scheduling Pass and Registry
+//===----------------------------------------------------------------------===//
+
+MachineSchedContext::MachineSchedContext():
+ MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
+ RegClassInfo = new RegisterClassInfo();
+}
+
+MachineSchedContext::~MachineSchedContext() {
+ delete RegClassInfo;
+}
+
+namespace {
+/// MachineScheduler runs after coalescing and before register allocation.
+class MachineScheduler : public MachineSchedContext,
+ public MachineFunctionPass {
+public:
+ MachineScheduler();
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory() {}
+
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+
+ static char ID; // Class identification, replacement for typeinfo
+};
+} // namespace
+
+char MachineScheduler::ID = 0;
+
+char &llvm::MachineSchedulerID = MachineScheduler::ID;
+
+INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
+ "Machine Instruction Scheduler", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachineScheduler, "misched",
+ "Machine Instruction Scheduler", false, false)
+
+MachineScheduler::MachineScheduler()
+: MachineFunctionPass(ID) {
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachinePassRegistry MachineSchedRegistry::Registry;
+
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+ return 0;
+}
+
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+ RegisterPassParser<MachineSchedRegistry> >
+MachineSchedOpt("misched",
+ cl::init(&useDefaultMachineSched), cl::Hidden,
+ cl::desc("Machine instruction scheduler to use"));
+
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+ useDefaultMachineSched);
+
+/// Forward declare the standard machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+
+/// Decrement this iterator until reaching the top or a non-debug instr.
+static MachineBasicBlock::iterator
+priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
+ assert(I != Beg && "reached the top of the region, cannot decrement");
+ while (--I != Beg) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
+ for(; I != End; ++I) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boudary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
+
+ // Initialize the context of the pass.
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ LIS = &getAnalysis<LiveIntervals>();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ if (VerifyScheduling) {
+ DEBUG(LIS->print(dbgs()));
+ MF->verify(this, "Before machine scheduling.");
+ }
+ RegClassInfo->runOnMachineFunction(*MF);
+
+ // Select the scheduler, or set the default.
+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+ if (Ctor == useDefaultMachineSched) {
+ // Get the default scheduler set by the target.
+ Ctor = MachineSchedRegistry::getDefault();
+ if (!Ctor) {
+ Ctor = createConvergingSched;
+ MachineSchedRegistry::setDefault(Ctor);
+ }
+ }
+ // Instantiate the selected scheduler.
+ OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+
+ // Visit all machine basic blocks.
+ //
+ // TODO: Visit blocks in global postorder or postorder within the bottom-up
+ // loop tree. Then we can optionally compute global RegPressure.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+ MBB != MBBEnd; ++MBB) {
+
+ Scheduler->startBlock(MBB);
+
+ // Break the block into scheduling regions [I, RegionEnd), and schedule each
+ // region as soon as it is discovered. RegionEnd points the scheduling
+ // boundary at the bottom of the region. The DAG does not include RegionEnd,
+ // but the region does (i.e. the next RegionEnd is above the previous
+ // RegionBegin). If the current block has no terminator then RegionEnd ==
+ // MBB->end() for the bottom region.
+ //
+ // The Scheduler may insert instructions during either schedule() or
+ // exitRegion(), even for empty regions. So the local iterators 'I' and
+ // 'RegionEnd' are invalid across these calls.
+ unsigned RemainingInstrs = MBB->size();
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+
+ // Avoid decrementing RegionEnd for blocks with no terminator.
+ if (RegionEnd != MBB->end()
+ || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+ --RegionEnd;
+ // Count the boundary instruction.
+ --RemainingInstrs;
+ }
+
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ MachineBasicBlock::iterator I = RegionEnd;
+ for(;I != MBB->begin(); --I, --RemainingInstrs) {
+ if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+ break;
+ }
+ // Notify the scheduler of the region, even if we may skip scheduling
+ // it. Perhaps it still needs to be bundled.
+ Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
+ // Close the current region. Bundle the terminator if needed.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->exitRegion();
+ continue;
+ }
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ DEBUG(dbgs() << MF->getName()
+ << ":BB#" << MBB->getNumber() << " " << MBB->getName()
+ << "\n From: " << *I << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " Remaining: " << RemainingInstrs << "\n");
+
+ // Schedule a region: possibly reorder instructions.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->schedule();
+
+ // Close the current region.
+ Scheduler->exitRegion();
+
+ // Scheduling has invalidated the current iterator 'I'. Ask the
+ // scheduler for the top of it's scheduled region.
+ RegionEnd = Scheduler->begin();
+ }
+ assert(RemainingInstrs == 0 && "Instruction count mismatch!");
+ Scheduler->finishBlock();
+ }
+ Scheduler->finalizeSchedule();
+ DEBUG(LIS->print(dbgs()));
+ if (VerifyScheduling)
+ MF->verify(this, "After machine scheduling.");
+ return true;
+}
+
+void MachineScheduler::print(raw_ostream &O, const Module* m) const {
+ // unimplemented
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ReadyQueue::dump() {
+ dbgs() << " " << Name << ": ";
+ for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+ dbgs() << Queue[i]->NodeNum << " ";
+ dbgs() << "\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
+//===----------------------------------------------------------------------===//
+
+ScheduleDAGMI::~ScheduleDAGMI() {
+ delete DFSResult;
+ DeleteContainerPointers(Mutations);
+ delete SchedImpl;
+}
+
+bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+ if (SuccSU != &ExitSU) {
+ // Do not use WillCreateCycle, it assumes SD scheduling.
+ // If Pred is reachable from Succ, then the edge creates a cycle.
+ if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+ return false;
+ Topo.AddPred(SuccSU, PredDep.getSUnit());
+ }
+ SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+ // Return true regardless of whether a new edge needed to be inserted.
+ return true;
+}
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+///
+/// FIXME: Adjust SuccSU height based on MinLatency.
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ if (SuccEdge->isCluster())
+ NextClusterSucc = SuccSU;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ releaseSucc(SU, &*I);
+ }
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+///
+/// FIXME: Adjust PredSU height based on MinLatency.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+ if (PredEdge->isWeak()) {
+ --PredSU->WeakSuccsLeft;
+ if (PredEdge->isCluster())
+ NextClusterPred = PredSU;
+ return;
+ }
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+ SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ releasePred(SU, &*I);
+ }
+}
+
+void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPos) {
+ // Advance RegionBegin if the first instruction moves down.
+ if (&*RegionBegin == MI)
+ ++RegionBegin;
+
+ // Update the instruction stream.
+ BB->splice(InsertPos, BB, MI);
+
+ // Update LiveIntervals
+ LIS->handleMove(MI, /*UpdateFlags=*/true);
+
+ // Recede RegionBegin if an instruction moves above the first.
+ if (RegionBegin == InsertPos)
+ RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#ifndef NDEBUG
+ if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+ CurrentTop = CurrentBottom;
+ return false;
+ }
+ ++NumInstrsScheduled;
+#endif
+ return true;
+}
+
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount)
+{
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+
+ // For convenience remember the end of the liveness region.
+ LiveRegionEnd =
+ (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
+}
+
+// Setup the register pressure trackers for the top scheduled top and bottom
+// scheduled regions.
+void ScheduleDAGMI::initRegPressure() {
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Close the RPTracker to finalize live ins.
+ RPTracker.closeRegion();
+
+ DEBUG(RPTracker.getPressure().dump(TRI));
+
+ // Initialize the live ins and live outs.
+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+ // Close one end of the tracker so we can call
+ // getMaxUpward/DownwardPressureDelta before advancing across any
+ // instructions. This converts currently live regs into live ins/outs.
+ TopRPTracker.closeTop();
+ BotRPTracker.closeBottom();
+
+ // Account for liveness generated by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ BotRPTracker.recede();
+
+ assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
+
+ // Cache the list of excess pressure sets in this region. This will also track
+ // the max pressure in the scheduled code for these sets.
+ RegionCriticalPSets.clear();
+ const std::vector<unsigned> &RegionPressure =
+ RPTracker.getPressure().MaxSetPressure;
+ for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+ << "Limit " << Limit
+ << " Actual " << RegionPressure[i] << "\n");
+ if (RegionPressure[i] > Limit)
+ RegionCriticalPSets.push_back(PressureElement(i, 0));
+ }
+ DEBUG(dbgs() << "Excess PSets: ";
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+ dbgs() << TRI->getRegPressureSetName(
+ RegionCriticalPSets[i].PSetID) << " ";
+ dbgs() << "\n");
+}
+
+// FIXME: When the pressure tracker deals in pressure differences then we won't
+// iterate over all RegionCriticalPSets[i].
+void ScheduleDAGMI::
+updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
+ unsigned ID = RegionCriticalPSets[i].PSetID;
+ int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
+ if ((int)NewMaxPressure[ID] > MaxUnits)
+ MaxUnits = NewMaxPressure[ID];
+ }
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+///
+/// This is a skeletal driver, with all the functionality pushed into helpers,
+/// so that it can be easilly extended by experimental schedulers. Generally,
+/// implementing MachineSchedStrategy should be sufficient to implement a new
+/// scheduling algorithm. However, if a scheduler further subclasses
+/// ScheduleDAGMI then it will want to override this virtual method in order to
+/// update any specialized state.
+void ScheduleDAGMI::schedule() {
+ buildDAGWithRegPressure();
+
+ Topo.InitDAGTopologicalSorting();
+
+ postprocessDAG();
+
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ // This may initialize a DFSResult to be used for queue priority.
+ SchedImpl->initialize(this);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+ if (ViewMISchedDAGs) viewGraph();
+
+ // Initialize ready queues now that the DAG and priority data are finalized.
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ assert(!SU->isScheduled && "Node already scheduled");
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ DEBUG({
+ unsigned BBNum = begin()->getParent()->getNumber();
+ dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+/// Build the DAG and setup three register pressure trackers.
+void ScheduleDAGMI::buildDAGWithRegPressure() {
+ // Initialize the register pressure tracker used by buildSchedGraph.
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Account for liveness generate by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ RPTracker.recede();
+
+ // Build the DAG, and compute current register pressure.
+ buildSchedGraph(AA, &RPTracker);
+
+ // Initialize top/bottom trackers after computing region pressure.
+ initRegPressure();
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void ScheduleDAGMI::postprocessDAG() {
+ for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
+ Mutations[i]->apply(this);
+ }
+}
+
+void ScheduleDAGMI::computeDFSResult() {
+ if (!DFSResult)
+ DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
+ DFSResult->clear();
+ ScheduledTrees.clear();
+ DFSResult->resize(SUnits.size());
+ DFSResult->compute(SUnits);
+ ScheduledTrees.resize(DFSResult->getNumSubtrees());
+}
+
+void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+ SmallVectorImpl<SUnit*> &BotRoots) {
+ for (std::vector<SUnit>::iterator
+ I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+ SUnit *SU = &(*I);
+ assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
+
+ // Order predecessors so DFSResult follows the critical path.
+ SU->biasCriticalPath();
+
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (!I->NumPredsLeft)
+ TopRoots.push_back(SU);
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (!I->NumSuccsLeft)
+ BotRoots.push_back(SU);
+ }
+ ExitSU.biasCriticalPath();
+}
+
+/// Identify DAG roots and setup scheduler queues.
+void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
+ NextClusterSucc = NULL;
+ NextClusterPred = NULL;
+
+ // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
+ //
+ // Nodes with unreleased weak edges can still be roots.
+ // Release top roots in forward order.
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
+ SchedImpl->releaseTopNode(*I);
+ }
+ // Release bottom roots in reverse order so the higher priority nodes appear
+ // first. This is more natural and slightly more efficient.
+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
+ SchedImpl->releaseBottomNode(*I);
+ }
+
+ releaseSuccessors(&EntrySU);
+ releasePredecessors(&ExitSU);
+
+ SchedImpl->registerRoots();
+
+ // Advance past initial DebugValues.
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
+ TopRPTracker.setPos(CurrentTop);
+
+ CurrentBottom = RegionEnd;
+}
+
+/// Move an instruction and update register pressure.
+void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
+ // Move the instruction to its new location in the instruction stream.
+ MachineInstr *MI = SU->getInstr();
+
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else {
+ moveInstruction(MI, CurrentTop);
+ TopRPTracker.setPos(MI);
+ }
+
+ // Update top scheduled pressure.
+ TopRPTracker.advance();
+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+ }
+ else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
+ else {
+ if (&*CurrentTop == MI) {
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ TopRPTracker.setPos(CurrentTop);
+ }
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ }
+ // Update bottom scheduled pressure.
+ BotRPTracker.recede();
+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
+ }
+}
+
+/// Update scheduler queues after scheduling an instruction.
+void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
+ // Release dependent instructions for scheduling.
+ if (IsTopNode)
+ releaseSuccessors(SU);
+ else
+ releasePredecessors(SU);
+
+ SU->isScheduled = true;
+
+ if (DFSResult) {
+ unsigned SubtreeID = DFSResult->getSubtreeID(SU);
+ if (!ScheduledTrees.test(SubtreeID)) {
+ ScheduledTrees.set(SubtreeID);
+ DFSResult->scheduleTree(SubtreeID);
+ SchedImpl->scheduleTree(SubtreeID);
+ }
+ }
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
+}
+
+/// Reinsert any remaining debug_values, just like the PostRA scheduler.
+void ScheduleDAGMI::placeDebugValues() {
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue) {
+ BB->splice(RegionBegin, BB, FirstDbgValue);
+ RegionBegin = FirstDbgValue;
+ }
+
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrevMI = P.second;
+ if (&*RegionBegin == DbgValue)
+ ++RegionBegin;
+ BB->splice(++OrigPrevMI, BB, DbgValue);
+ if (OrigPrevMI == llvm::prior(RegionEnd))
+ RegionEnd = DbgValue;
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGMI::dumpSchedule() const {
+ for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
+ if (SUnit *SU = getSUnit(&(*MI)))
+ SU->dump(this);
+ else
+ dbgs() << "Missing SUnit\n";
+ }
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// LoadClusterMutation - DAG post-processing to cluster loads.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between neighboring
+/// loads.
+class LoadClusterMutation : public ScheduleDAGMutation {
+ struct LoadInfo {
+ SUnit *SU;
+ unsigned BaseReg;
+ unsigned Offset;
+ LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
+ : SU(su), BaseReg(reg), Offset(ofs) {}
+ };
+ static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
+ const LoadClusterMutation::LoadInfo &RHS);
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+public:
+ LoadClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : TII(tii), TRI(tri) {}
+
+ virtual void apply(ScheduleDAGMI *DAG);
+protected:
+ void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+bool LoadClusterMutation::LoadInfoLess(
+ const LoadClusterMutation::LoadInfo &LHS,
+ const LoadClusterMutation::LoadInfo &RHS) {
+ if (LHS.BaseReg != RHS.BaseReg)
+ return LHS.BaseReg < RHS.BaseReg;
+ return LHS.Offset < RHS.Offset;
+}
+
+void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
+ ScheduleDAGMI *DAG) {
+ SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
+ for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
+ SUnit *SU = Loads[Idx];
+ unsigned BaseReg;
+ unsigned Offset;
+ if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
+ LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
+ }
+ if (LoadRecords.size() < 2)
+ return;
+ std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
+ unsigned ClusterLength = 1;
+ for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
+ if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
+ ClusterLength = 1;
+ continue;
+ }
+
+ SUnit *SUa = LoadRecords[Idx].SU;
+ SUnit *SUb = LoadRecords[Idx+1].SU;
+ if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
+ && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+
+ DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
+ << SUb->NodeNum << ")\n");
+ // Copy successor edges from SUa to SUb. Interleaving computation
+ // dependent on SUa can prevent load combining due to register reuse.
+ // Predecessor edges do not need to be copied from SUb to SUa since nearby
+ // loads should have effectively the same inputs.
+ for (SUnit::const_succ_iterator
+ SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
+ if (SI->getSUnit() == SUb)
+ continue;
+ DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
+ DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
+ }
+ ++ClusterLength;
+ }
+ else
+ ClusterLength = 1;
+ }
+}
+
+/// \brief Callback from DAG postProcessing to create cluster edges for loads.
+void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
+ // Map DAG NodeNum to store chain ID.
+ DenseMap<unsigned, unsigned> StoreChainIDs;
+ // Map each store chain to a set of dependent loads.
+ SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
+ for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+ SUnit *SU = &DAG->SUnits[Idx];
+ if (!SU->getInstr()->mayLoad())
+ continue;
+ unsigned ChainPredID = DAG->SUnits.size();
+ for (SUnit::const_pred_iterator
+ PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+ if (PI->isCtrl()) {
+ ChainPredID = PI->getSUnit()->NodeNum;
+ break;
+ }
+ }
+ // Check if this chain-like pred has been seen
+ // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
+ unsigned NumChains = StoreChainDependents.size();
+ std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
+ StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
+ if (Result.second)
+ StoreChainDependents.resize(NumChains + 1);
+ StoreChainDependents[Result.first->second].push_back(SU);
+ }
+ // Iterate over the store chains.
+ for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
+ clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between instructions
+/// that may be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+ const TargetInstrInfo *TII;
+public:
+ MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+
+ virtual void apply(ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+/// \brief Callback from DAG postProcessing to create cluster edges to encourage
+/// fused operations.
+void MacroFusion::apply(ScheduleDAGMI *DAG) {
+ // For now, assume targets can only fuse with the branch.
+ MachineInstr *Branch = DAG->ExitSU.getInstr();
+ if (!Branch)
+ return;
+
+ for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
+ SUnit *SU = &DAG->SUnits[--Idx];
+ if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+ continue;
+
+ // Create a single weak edge from SU to ExitSU. The only effect is to cause
+ // bottom-up scheduling to heavily prioritize the clustered SU. There is no
+ // need to copy predecessor edges from ExitSU to SU, since top-down
+ // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
+ // of SU, we could create an artificial edge from the deepest root, but it
+ // hasn't been needed yet.
+ bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+ (void)Success;
+ assert(Success && "No DAG nodes should be reachable from ExitSU");
+
+ DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
+public:
+ /// Represent the type of SchedCandidate found within a single queue.
+ /// pickNodeBidirectional depends on these listed by decreasing priority.
+ enum CandReason {
+ NoCand, SingleExcess, SingleCritical, Cluster,
+ ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
+ TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
+ NodeOrder};
+
+#ifndef NDEBUG
+ static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
+#endif
+
+ /// Policy for scheduling the next instruction in the candidate's zone.
+ struct CandPolicy {
+ bool ReduceLatency;
+ unsigned ReduceResIdx;
+ unsigned DemandResIdx;
+
+ CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
+ };
+
+ /// Status of an instruction's critical resource consumption.
+ struct SchedResourceDelta {
+ // Count critical resources in the scheduled region required by SU.
+ unsigned CritResources;
+
+ // Count critical resources from another region consumed by SU.
+ unsigned DemandedResources;
+
+ SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
+
+ bool operator==(const SchedResourceDelta &RHS) const {
+ return CritResources == RHS.CritResources
+ && DemandedResources == RHS.DemandedResources;
+ }
+ bool operator!=(const SchedResourceDelta &RHS) const {
+ return !operator==(RHS);
+ }
+ };
+
+ /// Store the state used by ConvergingScheduler heuristics, required for the
+ /// lifetime of one invocation of pickNode().
+ struct SchedCandidate {
+ CandPolicy Policy;
+
+ // The best SUnit candidate.
+ SUnit *SU;
+
+ // The reason for this candidate.
+ CandReason Reason;
+
+ // Register pressure values for the best candidate.
+ RegPressureDelta RPDelta;
+
+ // Critical resource consumption of the best candidate.
+ SchedResourceDelta ResDelta;
+
+ SchedCandidate(const CandPolicy &policy)
+ : Policy(policy), SU(NULL), Reason(NoCand) {}
+
+ bool isValid() const { return SU; }
+
+ // Copy the status of another candidate without changing policy.
+ void setBest(SchedCandidate &Best) {
+ assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+ SU = Best.SU;
+ Reason = Best.Reason;
+ RPDelta = Best.RPDelta;
+ ResDelta = Best.ResDelta;
+ }
+
+ void initResourceDelta(const ScheduleDAGMI *DAG,
+ const TargetSchedModel *SchedModel);
+ };
+
+ /// Summarize the unscheduled region.
+ struct SchedRemainder {
+ // Critical path through the DAG in expected latency.
+ unsigned CriticalPath;
+
+ // Unscheduled resources
+ SmallVector<unsigned, 16> RemainingCounts;
+ // Critical resource for the unscheduled zone.
+ unsigned CritResIdx;
+ // Number of micro-ops left to schedule.
+ unsigned RemainingMicroOps;
+
+ void reset() {
+ CriticalPath = 0;
+ RemainingCounts.clear();
+ CritResIdx = 0;
+ RemainingMicroOps = 0;
+ }
+
+ SchedRemainder() { reset(); }
+
+ void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
+
+ unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
+ if (!SchedModel->hasInstrSchedModel())
+ return 0;
+
+ return std::max(
+ RemainingMicroOps * SchedModel->getMicroOpFactor(),
+ RemainingCounts[CritResIdx]);
+ }
+ };
+
+ /// Each Scheduling boundary is associated with ready queues. It tracks the
+ /// current cycle in the direction of movement, and maintains the state
+ /// of "hazards" and other interlocks at the current cycle.
+ struct SchedBoundary {
+ ScheduleDAGMI *DAG;
+ const TargetSchedModel *SchedModel;
+ SchedRemainder *Rem;
+
+ ReadyQueue Available;
+ ReadyQueue Pending;
+ bool CheckPending;
+
+ // For heuristics, keep a list of the nodes that immediately depend on the
+ // most recently scheduled node.
+ SmallPtrSet<const SUnit*, 8> NextSUs;
+
+ ScheduleHazardRecognizer *HazardRec;
+
+ unsigned CurrCycle;
+ unsigned IssueCount;
+
+ /// MinReadyCycle - Cycle of the soonest available instruction.
+ unsigned MinReadyCycle;
+
+ // The expected latency of the critical path in this scheduled zone.
+ unsigned ExpectedLatency;
+
+ // Resources used in the scheduled zone beyond this boundary.
+ SmallVector<unsigned, 16> ResourceCounts;
+
+ // Cache the critical resources ID in this scheduled zone.
+ unsigned CritResIdx;
+
+ // Is the scheduled region resource limited vs. latency limited.
+ bool IsResourceLimited;
+
+ unsigned ExpectedCount;
+
+#ifndef NDEBUG
+ // Remember the greatest min operand latency.
+ unsigned MaxMinLatency;
+#endif
+
+ void reset() {
+ // A new HazardRec is created for each DAG and owned by SchedBoundary.
+ delete HazardRec;
+
+ Available.clear();
+ Pending.clear();
+ CheckPending = false;
+ NextSUs.clear();
+ HazardRec = 0;
+ CurrCycle = 0;
+ IssueCount = 0;
+ MinReadyCycle = UINT_MAX;
+ ExpectedLatency = 0;
+ ResourceCounts.resize(1);
+ assert(!ResourceCounts[0] && "nonzero count for bad resource");
+ CritResIdx = 0;
+ IsResourceLimited = false;
+ ExpectedCount = 0;
+#ifndef NDEBUG
+ MaxMinLatency = 0;
+#endif
+ // Reserve a zero-count for invalid CritResIdx.
+ ResourceCounts.resize(1);
+ }
+
+ /// Pending queues extend the ready queues with the same ID and the
+ /// PendingFlag set.
+ SchedBoundary(unsigned ID, const Twine &Name):
+ DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
+ Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
+ HazardRec(0) {
+ reset();
+ }
+
+ ~SchedBoundary() { delete HazardRec; }
+
+ void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
+ SchedRemainder *rem);
+
+ bool isTop() const {
+ return Available.getID() == ConvergingScheduler::TopQID;
+ }
+
+ unsigned getUnscheduledLatency(SUnit *SU) const {
+ if (isTop())
+ return SU->getHeight();
+ return SU->getDepth() + SU->Latency;
+ }
+
+ unsigned getCriticalCount() const {
+ return ResourceCounts[CritResIdx];
+ }
+
+ bool checkHazard(SUnit *SU);
+
+ void setLatencyPolicy(CandPolicy &Policy);
+
+ void releaseNode(SUnit *SU, unsigned ReadyCycle);
+
+ void bumpCycle();
+
+ void countResource(unsigned PIdx, unsigned Cycles);
+
+ void bumpNode(SUnit *SU);
+
+ void releasePending();
+
+ void removeReady(SUnit *SU);
+
+ SUnit *pickOnlyChoice();
+ };
+
+private:
+ ScheduleDAGMI *DAG;
+ const TargetSchedModel *SchedModel;
+ const TargetRegisterInfo *TRI;
+
+ // State of the top and bottom scheduled instruction boundaries.
+ SchedRemainder Rem;
+ SchedBoundary Top;
+ SchedBoundary Bot;
+
+public:
+ /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
+ enum {
+ TopQID = 1,
+ BotQID = 2,
+ LogMaxQID = 2
+ };
+
+ ConvergingScheduler():
+ DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+
+ virtual void initialize(ScheduleDAGMI *dag);
+
+ virtual SUnit *pickNode(bool &IsTopNode);
+
+ virtual void schedNode(SUnit *SU, bool IsTopNode);
+
+ virtual void releaseTopNode(SUnit *SU);
+
+ virtual void releaseBottomNode(SUnit *SU);
+
+ virtual void registerRoots();
+
+protected:
+ void balanceZones(
+ ConvergingScheduler::SchedBoundary &CriticalZone,
+ ConvergingScheduler::SchedCandidate &CriticalCand,
+ ConvergingScheduler::SchedBoundary &OppositeZone,
+ ConvergingScheduler::SchedCandidate &OppositeCand);
+
+ void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
+ ConvergingScheduler::SchedCandidate &BotCand);
+
+ void tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker);
+
+ SUnit *pickNodeBidirectional(bool &IsTopNode);
+
+ void pickNodeFromQueue(SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate);
+
+#ifndef NDEBUG
+ void traceCandidate(const SchedCandidate &Cand);
+#endif
+};
+} // namespace
+
+void ConvergingScheduler::SchedRemainder::
+init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
+ reset();
+ if (!SchedModel->hasInstrSchedModel())
+ return;
+ RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
+ for (std::vector<SUnit>::iterator
+ I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
+ RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned PIdx = PI->ProcResourceIdx;
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ RemainingCounts[PIdx] += (Factor * PI->Cycles);
+ }
+ }
+ for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
+ PIdx != PEnd; ++PIdx) {
+ if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
+ >= (int)SchedModel->getLatencyFactor()) {
+ CritResIdx = PIdx;
+ }
+ }
+}
+
+void ConvergingScheduler::SchedBoundary::
+init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
+ reset();
+ DAG = dag;
+ SchedModel = smodel;
+ Rem = rem;
+ if (SchedModel->hasInstrSchedModel())
+ ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
+}
+
+void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
+
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ Bot.init(DAG, SchedModel, &Rem);
+
+ // Initialize resource counts.
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+ const TargetMachine &TM = DAG->MF.getTarget();
+ Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+void ConvergingScheduler::releaseTopNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+ unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
+ }
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isWeak())
+ continue;
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+ }
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+void ConvergingScheduler::registerRoots() {
+ Rem.CriticalPath = DAG->ExitSU.getDepth();
+ // Some roots may not feed into ExitSU. Check all of them in case.
+ for (std::vector<SUnit*>::const_iterator
+ I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
+ if ((*I)->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = (*I)->getDepth();
+ }
+ DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
+ << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
+ return true;
+ }
+ return false;
+}
+
+/// Compute the remaining latency to determine whether ILP should be increased.
+void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
+ // FIXME: compile time. In all, we visit four queues here one we should only
+ // need to visit the one that was last popped if we cache the result.
+ unsigned RemLatency = 0;
+ for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
+ I != E; ++I) {
+ unsigned L = getUnscheduledLatency(*I);
+ if (L > RemLatency)
+ RemLatency = L;
+ }
+ for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
+ I != E; ++I) {
+ unsigned L = getUnscheduledLatency(*I);
+ if (L > RemLatency)
+ RemLatency = L;
+ }
+ unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+ if (RemLatency + ExpectedLatency >= CriticalPathLimit
+ && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
+ Policy.ReduceLatency = true;
+ DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
+ }
+}
+
+void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
+ unsigned ReadyCycle) {
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+ Pending.push(SU);
+ else
+ Available.push(SU);
+
+ // Record this node as an immediate dependent of the scheduled node.
+ NextSUs.insert(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingScheduler::SchedBoundary::bumpCycle() {
+ unsigned Width = SchedModel->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ unsigned NextCycle = CurrCycle + 1;
+ assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ if (MinReadyCycle > NextCycle) {
+ IssueCount = 0;
+ NextCycle = MinReadyCycle;
+ }
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ }
+ else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+ IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
+
+ DEBUG(dbgs() << " " << Available.getName()
+ << " Cycle: " << CurrCycle << '\n');
+}
+
+/// Add the given processor resource to this scheduled zone.
+void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
+ unsigned Cycles) {
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name
+ << " +(" << Cycles << "x" << Factor
+ << ") / " << SchedModel->getLatencyFactor() << '\n');
+
+ unsigned Count = Factor * Cycles;
+ ResourceCounts[PIdx] += Count;
+ assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
+ Rem->RemainingCounts[PIdx] -= Count;
+
+ // Check if this resource exceeds the current critical resource by a full
+ // cycle. If so, it becomes the critical resource.
+ if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
+ >= (int)SchedModel->getLatencyFactor()) {
+ CritResIdx = PIdx;
+ DEBUG(dbgs() << " *** Critical resource "
+ << SchedModel->getProcResource(PIdx)->Name << " x"
+ << ResourceCounts[PIdx] << '\n');
+ }
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+ // Update resource counts and critical resource.
+ if (SchedModel->hasInstrSchedModel()) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ countResource(PI->ProcResourceIdx, PI->Cycles);
+ }
+ }
+ if (isTop()) {
+ if (SU->getDepth() > ExpectedLatency)
+ ExpectedLatency = SU->getDepth();
+ }
+ else {
+ if (SU->getHeight() > ExpectedLatency)
+ ExpectedLatency = SU->getHeight();
+ }
+
+ IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
+
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+
+ // checkHazard prevents scheduling multiple instructions per cycle that exceed
+ // issue width. However, we commonly reach the maximum. In this case
+ // opportunistically bump the cycle to avoid uselessly checking everything in
+ // the readyQ. Furthermore, a single instruction may produce more than one
+ // cycle's worth of micro-ops.
+ if (IssueCount >= SchedModel->getIssueWidth()) {
+ DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ }
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingScheduler::SchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin()+i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin()+i);
+ --i; --e;
+ }
+ DEBUG(if (!Pending.empty()) Pending.dump());
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// defer any nodes that now hit a hazard, and advance the cycle until at least
+/// one node is ready. If multiple instructions are ready, return NULL.
+SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ if (IssueCount > 0) {
+ // Defer any ready instrs that now have a hazard.
+ for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+ if (checkHazard(*I)) {
+ Pending.push(*I);
+ I = Available.remove(I);
+ continue;
+ }
+ ++I;
+ }
+ }
+ for (unsigned i = 0; Available.empty(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard"); (void)i;
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return NULL;
+}
+
+/// Record the candidate policy for opposite zones with different critical
+/// resources.
+///
+/// If the CriticalZone is latency limited, don't force a policy for the
+/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
+void ConvergingScheduler::balanceZones(
+ ConvergingScheduler::SchedBoundary &CriticalZone,
+ ConvergingScheduler::SchedCandidate &CriticalCand,
+ ConvergingScheduler::SchedBoundary &OppositeZone,
+ ConvergingScheduler::SchedCandidate &OppositeCand) {
+
+ if (!CriticalZone.IsResourceLimited)
+ return;
+ assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
+
+ SchedRemainder *Rem = CriticalZone.Rem;
+
+ // If the critical zone is overconsuming a resource relative to the
+ // remainder, try to reduce it.
+ unsigned RemainingCritCount =
+ Rem->RemainingCounts[CriticalZone.CritResIdx];
+ if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
+ > (int)SchedModel->getLatencyFactor()) {
+ CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
+ DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+ << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
+ << '\n');
+ }
+ // If the other zone is underconsuming a resource relative to the full zone,
+ // try to increase it.
+ unsigned OppositeCount =
+ OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
+ if ((int)(OppositeZone.ExpectedCount - OppositeCount)
+ > (int)SchedModel->getLatencyFactor()) {
+ OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
+ DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+ << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
+ << '\n');
+ }
+}
+
+/// Determine if the scheduled zones exceed resource limits or critical path and
+/// set each candidate's ReduceHeight policy accordingly.
+void ConvergingScheduler::checkResourceLimits(
+ ConvergingScheduler::SchedCandidate &TopCand,
+ ConvergingScheduler::SchedCandidate &BotCand) {
+
+ // Set ReduceLatency to true if needed.
+ Bot.setLatencyPolicy(BotCand.Policy);
+ Top.setLatencyPolicy(TopCand.Policy);
+
+ // Handle resource-limited regions.
+ if (Top.IsResourceLimited && Bot.IsResourceLimited
+ && Top.CritResIdx == Bot.CritResIdx) {
+ // If the scheduled critical resource in both zones is no longer the
+ // critical remaining resource, attempt to reduce resource height both ways.
+ if (Top.CritResIdx != Rem.CritResIdx) {
+ TopCand.Policy.ReduceResIdx = Top.CritResIdx;
+ BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
+ DEBUG(dbgs() << "Reduce scheduled "
+ << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
+ }
+ return;
+ }
+ // Handle latency-limited regions.
+ if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
+ // If the total scheduled expected latency exceeds the region's critical
+ // path then reduce latency both ways.
+ //
+ // Just because a zone is not resource limited does not mean it is latency
+ // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
+ // to exceed expected latency.
+ if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
+ && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
+ TopCand.Policy.ReduceLatency = true;
+ BotCand.Policy.ReduceLatency = true;
+ DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+ << " + " << Bot.ExpectedLatency << '\n');
+ }
+ return;
+ }
+ // The critical resource is different in each zone, so request balancing.
+
+ // Compute the cost of each zone.
+ Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
+ Top.ExpectedCount = std::max(
+ Top.getCriticalCount(),
+ Top.ExpectedCount * SchedModel->getLatencyFactor());
+ Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
+ Bot.ExpectedCount = std::max(
+ Bot.getCriticalCount(),
+ Bot.ExpectedCount * SchedModel->getLatencyFactor());
+
+ balanceZones(Top, TopCand, Bot, BotCand);
+ balanceZones(Bot, BotCand, Top, TopCand);
+}
+
+void ConvergingScheduler::SchedCandidate::
+initResourceDelta(const ScheduleDAGMI *DAG,
+ const TargetSchedModel *SchedModel) {
+ if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
+ return;
+
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ if (PI->ProcResourceIdx == Policy.ReduceResIdx)
+ ResDelta.CritResources += PI->Cycles;
+ if (PI->ProcResourceIdx == Policy.DemandResIdx)
+ ResDelta.DemandedResources += PI->Cycles;
+ }
+}
+
+/// Return true if this heuristic determines order.
+static bool tryLess(int TryVal, int CandVal,
+ ConvergingScheduler::SchedCandidate &TryCand,
+ ConvergingScheduler::SchedCandidate &Cand,
+ ConvergingScheduler::CandReason Reason) {
+ if (TryVal < CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal > CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+
+static bool tryGreater(int TryVal, int CandVal,
+ ConvergingScheduler::SchedCandidate &TryCand,
+ ConvergingScheduler::SchedCandidate &Cand,
+ ConvergingScheduler::CandReason Reason) {
+ if (TryVal > CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal < CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+
+static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+ return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
+}
+
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending.
+/// \param RPTracker describes reg pressure within the scheduled zone.
+/// \param TempTracker is a scratch pressure tracker to reuse in queries.
+void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker) {
+
+ // Always initialize TryCand's RPDelta.
+ TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+ // Avoid exceeding the target's limit.
+ if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
+ Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
+ return;
+ if (Cand.Reason == SingleExcess)
+ Cand.Reason = MultiPressure;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
+ Cand.RPDelta.CriticalMax.UnitIncrease,
+ TryCand, Cand, SingleCritical))
+ return;
+ if (Cand.Reason == SingleCritical)
+ Cand.Reason = MultiPressure;
+
+ // Keep clustered nodes together to encourage downstream peephole
+ // optimizations which may reduce resource requirements.
+ //
+ // This is a best effort to set things up for a post-RA pass. Optimizations
+ // like generating loads of multiple registers should ideally be done within
+ // the scheduler pass by combining the loads during DAG postprocessing.
+ const SUnit *NextClusterSU =
+ Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
+ TryCand, Cand, Cluster))
+ return;
+ // Currently, weak edges are for clustering, so we hard-code that reason.
+ // However, deferring the current TryCand will not change Cand's reason.
+ CandReason OrigReason = Cand.Reason;
+ if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
+ getWeakLeft(Cand.SU, Zone.isTop()),
+ TryCand, Cand, Cluster)) {
+ Cand.Reason = OrigReason;
+ return;
+ }
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ if (Cand.Policy.ReduceLatency) {
+ if (Zone.isTop()) {
+ if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
+ > Zone.ExpectedCount) {
+ if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, TopDepthReduce))
+ return;
+ }
+ if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, TopPathReduce))
+ return;
+ }
+ else {
+ if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
+ > Zone.ExpectedCount) {
+ if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, BotHeightReduce))
+ return;
+ }
+ if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, BotPathReduce))
+ return;
+ }
+ }
+
+ // Avoid increasing the max pressure of the entire region.
+ if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
+ Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
+ return;
+ if (Cand.Reason == SingleMax)
+ Cand.Reason = MultiPressure;
+
+ // Prefer immediate defs/users of the last scheduled instruction. This is a
+ // nice pressure avoidance strategy that also conserves the processor's
+ // register renaming resources and keeps the machine code readable.
+ if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
+ TryCand, Cand, NextDefUse))
+ return;
+
+ // Fall through to original instruction order.
+ if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
+}
+
+/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
+/// more desirable than RHS from scheduling standpoint.
+static bool compareRPDelta(const RegPressureDelta &LHS,
+ const RegPressureDelta &RHS) {
+ // Compare each component of pressure in decreasing order of importance
+ // without checking if any are valid. Invalid PressureElements are assumed to
+ // have UnitIncrease==0, so are neutral.
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
+ DEBUG(dbgs() << "RP excess top - bot: "
+ << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
+ return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
+ }
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
+ DEBUG(dbgs() << "RP critical top - bot: "
+ << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
+ << '\n');
+ return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
+ }
+ // Avoid increasing the max pressure of the entire region.
+ if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
+ DEBUG(dbgs() << "RP current top - bot: "
+ << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
+ << '\n');
+ return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
+ }
+ return false;
+}
+
+#ifndef NDEBUG
+const char *ConvergingScheduler::getReasonStr(
+ ConvergingScheduler::CandReason Reason) {
+ switch (Reason) {
+ case NoCand: return "NOCAND ";
+ case SingleExcess: return "REG-EXCESS";
+ case SingleCritical: return "REG-CRIT ";
+ case Cluster: return "CLUSTER ";
+ case SingleMax: return "REG-MAX ";
+ case MultiPressure: return "REG-MULTI ";
+ case ResourceReduce: return "RES-REDUCE";
+ case ResourceDemand: return "RES-DEMAND";
+ case TopDepthReduce: return "TOP-DEPTH ";
+ case TopPathReduce: return "TOP-PATH ";
+ case BotHeightReduce:return "BOT-HEIGHT";
+ case BotPathReduce: return "BOT-PATH ";
+ case NextDefUse: return "DEF-USE ";
+ case NodeOrder: return "ORDER ";
+ };
+ llvm_unreachable("Unknown reason!");
+}
+
+void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
+ PressureElement P;
+ unsigned ResIdx = 0;
+ unsigned Latency = 0;
+ switch (Cand.Reason) {
+ default:
+ break;
+ case SingleExcess:
+ P = Cand.RPDelta.Excess;
+ break;
+ case SingleCritical:
+ P = Cand.RPDelta.CriticalMax;
+ break;
+ case SingleMax:
+ P = Cand.RPDelta.CurrentMax;
+ break;
+ case ResourceReduce:
+ ResIdx = Cand.Policy.ReduceResIdx;
+ break;
+ case ResourceDemand:
+ ResIdx = Cand.Policy.DemandResIdx;
+ break;
+ case TopDepthReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ case TopPathReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotHeightReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotPathReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ }
+ dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+ if (P.isValid())
+ dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
+ << ":" << P.UnitIncrease << " ";
+ else
+ dbgs() << " ";
+ if (ResIdx)
+ dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
+ else
+ dbgs() << " ";
+ if (Latency)
+ dbgs() << " " << Latency << " cycles ";
+ else
+ dbgs() << " ";
+ dbgs() << '\n';
+}
+#endif
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Cand) {
+ ReadyQueue &Q = Zone.Available;
+
+ DEBUG(Q.dump());
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+
+ SchedCandidate TryCand(Cand.Policy);
+ TryCand.SU = *I;
+ tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
+ if (TryCand.Reason != NoCand) {
+ // Initialize resource delta if needed in case future heuristics query it.
+ if (TryCand.ResDelta == SchedResourceDelta())
+ TryCand.initResourceDelta(DAG, SchedModel);
+ Cand.setBest(TryCand);
+ DEBUG(traceCandidate(Cand));
+ }
+ }
+}
+
+static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
+ bool IsTop) {
+ DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot")
+ << " SU(" << Cand.SU->NodeNum << ") "
+ << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ return SU;
+ }
+ CandPolicy NoPolicy;
+ SchedCandidate BotCand(NoPolicy);
+ SchedCandidate TopCand(NoPolicy);
+ checkResourceLimits(TopCand, BotCand);
+
+ // Prefer bottom scheduling when heuristics are silent.
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
+ IsTopNode = false;
+ tracePick(BotCand, IsTopNode);
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
+ if (TopCand.Reason < BotCand.Reason) {
+ IsTopNode = true;
+ tracePick(TopCand, IsTopNode);
+ return TopCand.SU;
+ }
+ IsTopNode = false;
+ tracePick(BotCand, IsTopNode);
+ return BotCand.SU;
+ }
+ // Check for a salient pressure difference and pick the best from either side.
+ if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
+ IsTopNode = true;
+ tracePick(TopCand, IsTopNode);
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate, in node order if all else failed.
+ if (TopCand.Reason < BotCand.Reason) {
+ IsTopNode = true;
+ tracePick(TopCand, IsTopNode);
+ return TopCand.SU;
+ }
+ IsTopNode = false;
+ tracePick(BotCand, IsTopNode);
+ return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return NULL;
+ }
+ SUnit *SU;
+ do {
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ SchedCandidate TopCand(NoPolicy);
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ }
+ else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ SchedCandidate BotCand(NoPolicy);
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ }
+ else {
+ SU = pickNodeBidirectional(IsTopNode);
+ }
+ } while (SU->isScheduled);
+
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ DEBUG(dbgs() << "Scheduling " << *SU->getInstr());
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
+/// it's state based on the current cycle before MachineSchedStrategy does.
+void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ SU->TopReadyCycle = Top.CurrCycle;
+ Top.bumpNode(SU);
+ }
+ else {
+ SU->BotReadyCycle = Bot.CurrCycle;
+ Bot.bumpNode(SU);
+ }
+}
+
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
+ // Register DAG post-processors.
+ if (EnableLoadCluster)
+ DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
+ if (EnableMacroFusion)
+ DAG->addMutation(new MacroFusion(DAG->TII));
+ return DAG;
+}
+static MachineSchedRegistry
+ConvergingSchedRegistry("converge", "Standard converging scheduler.",
+ createConvergingSched);
+
+//===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Order nodes by the ILP metric.
+struct ILPOrder {
+ const SchedDFSResult *DFSResult;
+ const BitVector *ScheduledTrees;
+ bool MaximizeILP;
+
+ ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
+
+ /// \brief Apply a less-than relation on node priority.
+ ///
+ /// (Return true if A comes after B in the Q.)
+ bool operator()(const SUnit *A, const SUnit *B) const {
+ unsigned SchedTreeA = DFSResult->getSubtreeID(A);
+ unsigned SchedTreeB = DFSResult->getSubtreeID(B);
+ if (SchedTreeA != SchedTreeB) {
+ // Unscheduled trees have lower priority.
+ if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
+ return ScheduledTrees->test(SchedTreeB);
+
+ // Trees with shallower connections have have lower priority.
+ if (DFSResult->getSubtreeLevel(SchedTreeA)
+ != DFSResult->getSubtreeLevel(SchedTreeB)) {
+ return DFSResult->getSubtreeLevel(SchedTreeA)
+ < DFSResult->getSubtreeLevel(SchedTreeB);
+ }
+ }
+ if (MaximizeILP)
+ return DFSResult->getILP(A) < DFSResult->getILP(B);
+ else
+ return DFSResult->getILP(A) > DFSResult->getILP(B);
+ }
+};
+
+/// \brief Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+ /// In case all subtrees are eventually connected to a common root through
+ /// data dependence (e.g. reduction), place an upper limit on their size.
+ ///
+ /// FIXME: A subtree limit is generally good, but in the situation commented
+ /// above, where multiple similar subtrees feed a common root, we should
+ /// only split at a point where the resulting subtrees will be balanced.
+ /// (a motivating test case must be found).
+ static const unsigned SubtreeLimit = 16;
+
+ ScheduleDAGMI *DAG;
+ ILPOrder Cmp;
+
+ std::vector<SUnit*> ReadyQ;
+public:
+ ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
+
+ virtual void initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+ DAG->computeDFSResult();
+ Cmp.DFSResult = DAG->getDFSResult();
+ Cmp.ScheduledTrees = &DAG->getScheduledTrees();
+ ReadyQ.clear();
+ }
+
+ virtual void registerRoots() {
+ // Restore the heap in ReadyQ with the updated DFS results.
+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ /// Callback to select the highest priority node from the ready Q.
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ if (ReadyQ.empty()) return NULL;
+ std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ SUnit *SU = ReadyQ.back();
+ ReadyQ.pop_back();
+ IsTopNode = false;
+ DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
+ << *SU->getInstr()
+ << " ILP: " << DAG->getDFSResult()->getILP(SU)
+ << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
+ << DAG->getDFSResult()->getSubtreeLevel(
+ DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
+ return SU;
+ }
+
+ /// \brief Scheduler callback to notify that a new subtree is scheduled.
+ virtual void scheduleTree(unsigned SubtreeID) {
+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+
+ /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
+ /// DFSResults, and resort the priority Q.
+ virtual void schedNode(SUnit *SU, bool IsTopNode) {
+ assert(!IsTopNode && "SchedDFSResult needs bottom-up");
+ }
+
+ virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
+
+ virtual void releaseBottomNode(SUnit *SU) {
+ ReadyQ.push_back(SU);
+ std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new ILPScheduler(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new ILPScheduler(false));
+}
+static MachineSchedRegistry ILPMaxRegistry(
+ "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+ "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Shuffler for Correctness Testing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace {
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
+ bool operator()(SUnit *A, SUnit *B) const {
+ if (IsReverse)
+ return A->NodeNum > B->NodeNum;
+ else
+ return A->NodeNum < B->NodeNum;
+ }
+};
+
+/// Reorder instructions as much as possible.
+class InstructionShuffler : public MachineSchedStrategy {
+ bool IsAlternating;
+ bool IsTopDown;
+
+ // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+ // gives nodes with a higher number higher priority causing the latest
+ // instructions to be scheduled first.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+ TopQ;
+ // When scheduling bottom-up, use greater-than as the queue priority.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+ BottomQ;
+public:
+ InstructionShuffler(bool alternate, bool topdown)
+ : IsAlternating(alternate), IsTopDown(topdown) {}
+
+ virtual void initialize(ScheduleDAGMI *) {
+ TopQ.clear();
+ BottomQ.clear();
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ SUnit *SU;
+ if (IsTopDown) {
+ do {
+ if (TopQ.empty()) return NULL;
+ SU = TopQ.top();
+ TopQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = true;
+ }
+ else {
+ do {
+ if (BottomQ.empty()) return NULL;
+ SU = BottomQ.top();
+ BottomQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = false;
+ }
+ if (IsAlternating)
+ IsTopDown = !IsTopDown;
+ return SU;
+ }
+
+ virtual void schedNode(SUnit *SU, bool IsTopNode) {}
+
+ virtual void releaseTopNode(SUnit *SU) {
+ TopQ.push(SU);
+ }
+ virtual void releaseBottomNode(SUnit *SU) {
+ BottomQ.push(SU);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+ bool Alternate = !ForceTopDown && !ForceBottomUp;
+ bool TopDown = !ForceBottomUp;
+ assert((TopDown || !ForceTopDown) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
+}
+static MachineSchedRegistry ShufflerRegistry(
+ "shuffle", "Shuffle machine instructions alternating directions",
+ createInstructionShuffler);
+#endif // !NDEBUG
+
+//===----------------------------------------------------------------------===//
+// GraphWriter support for ScheduleDAGMI.
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace llvm {
+
+template<> struct GraphTraits<
+ ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
+
+template<>
+struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool isNodeHidden(const SUnit *Node) {
+ return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ }
+
+ static bool hasNodeAddressLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ return false;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+ static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
+ std::string Str;
+ raw_string_ostream SS(Str);
+ SS << "SU(" << SU->NodeNum << ')';
+ return SS.str();
+ }
+ static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+ }
+
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ std::string Str("shape=Mrecord");
+ const SchedDFSResult *DFS =
+ static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
+ if (DFS) {
+ Str += ",style=filled,fillcolor=\"#";
+ Str += DOT::getColorString(DFS->getSubtreeID(N));
+ Str += '"';
+ }
+ return Str;
+ }
+};
+} // namespace llvm
+#endif // NDEBUG
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAGMI::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 0000000..4dafbe5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,712 @@
+//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-sink"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static cl::opt<bool>
+SplitEdges("machine-sink-split",
+ cl::desc("Split critical edges during machine sinking"),
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+
+namespace {
+ class MachineSinking : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI; // Machine register information
+ MachineDominatorTree *DT; // Machine dominator tree
+ MachineLoopInfo *LI;
+ AliasAnalysis *AA;
+
+ // Remember which edges have been considered for breaking.
+ SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+ CEBCandidates;
+
+ public:
+ static char ID; // Pass identification
+ MachineSinking() : MachineFunctionPass(ID) {
+ initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ }
+
+ virtual void releaseMemory() {
+ CEBCandidates.clear();
+ }
+
+ private:
+ bool ProcessBlock(MachineBasicBlock &MBB);
+ bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+ MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
+ bool SinkInstruction(MachineInstr *MI, bool &SawStore);
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge, bool &LocalUse) const;
+ MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge);
+ bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo);
+
+ bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+ };
+
+ // SuccessorSorter - Sort Successors according to their loop depth.
+ struct SuccessorSorter {
+ SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {}
+ bool operator()(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) const {
+ return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+ }
+ MachineLoopInfo *LI;
+ };
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+char &llvm::MachineSinkingID = MachineSinking::ID;
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (!MI->isCopy())
+ return false;
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+ !MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+ if (SRC != DRC)
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isCopyLike())
+ return false;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MRI->replaceRegWith(DstReg, SrcReg);
+ MI->eraseFromParent();
+ ++NumCoalesces;
+ return true;
+}
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Only makes sense for vregs");
+
+ // Ignore debug uses because debug info doesn't affect the code.
+ if (MRI->use_nodbg_empty(Reg))
+ return true;
+
+ // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+ // into and they are all PHI nodes. In this case, machine-sink must break
+ // the critical edge first. e.g.
+ //
+ // BB#1: derived from LLVM BB %bb4.preheader
+ // Predecessors according to CFG: BB#0
+ // ...
+ // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+ // ...
+ // JE_4 <BB#37>, %EFLAGS<imp-use>
+ // Successors according to CFG: BB#37 BB#2
+ //
+ // BB#2: derived from LLVM BB %bb.nph
+ // Predecessors according to CFG: BB#0 BB#1
+ // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+ BreakPHIEdge = true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (!(UseBlock == MBB && UseInst->isPHI() &&
+ UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+ BreakPHIEdge = false;
+ break;
+ }
+ }
+ if (BreakPHIEdge)
+ return true;
+
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ // Determine the block of the use.
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseInst->isPHI()) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+ } else if (UseBlock == DefMBB) {
+ LocalUse = true;
+ return false;
+ }
+
+ // Check that it dominates.
+ if (!DT->dominates(MBB, UseBlock))
+ return false;
+ }
+
+ return true;
+}
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "******** Machine Sinking ********\n");
+
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LI = &getAnalysis<MachineLoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ CEBCandidates.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+ return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
+ if (!DT->isReachableFromEntry(&MBB)) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ MachineBasicBlock::iterator I = MBB.end();
+ --I;
+ bool ProcessedBegin, SawStore = false;
+ do {
+ MachineInstr *MI = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == MBB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (MI->isDebugValue())
+ continue;
+
+ bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
+ if (Joined) {
+ MadeChange = true;
+ continue;
+ }
+
+ if (SinkInstruction(MI, SawStore))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+ // FIXME: Need much better heuristics.
+
+ // If the pass has already considered breaking this edge (during this pass
+ // through the function), then let's go ahead and break it. This means
+ // sinking multiple "cheap" instructions into the same block.
+ if (!CEBCandidates.insert(std::make_pair(From, To)))
+ return true;
+
+ if (!MI->isCopy() && !MI->isAsCheapAsAMove())
+ return true;
+
+ // MI is cheap, we probably don't want to break the critical edge for it.
+ // However, if this would allow some definitions of its source operands
+ // to be sunk then it's probably worth it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MRI->hasOneNonDBGUse(Reg))
+ return true;
+ }
+
+ return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+ return 0;
+
+ // Avoid breaking back edge. From == To means backedge for single BB loop.
+ if (!SplitEdges || FromBB == ToBB)
+ return 0;
+
+ // Check for backedges of more "complex" loops.
+ if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+ LI->isLoopHeader(ToBB))
+ return 0;
+
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // BB#1:
+ // v1024
+ // Beq BB#3
+ // <fallthrough>
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ //
+ // BB#1:
+ // ...
+ // Bne BB#2
+ // BB#4:
+ // v1024 =
+ // B BB#3
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ //
+ // There is no need to do this check if all the uses are PHI nodes. PHI
+ // sources are only defined on the specific predecessor edges.
+ if (!BreakPHIEdge) {
+ for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
+ E = ToBB->pred_end(); PI != E; ++PI) {
+ if (*PI == FromBB)
+ continue;
+ if (!DT->dominates(ToBB, *PI))
+ return 0;
+ }
+ }
+
+ return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+ return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
+}
+
+/// collectDebgValues - Scan instructions following MI and collect any
+/// matching DBG_VALUEs.
+static void collectDebugValues(MachineInstr *MI,
+ SmallVector<MachineInstr *, 2> & DbgValues) {
+ DbgValues.clear();
+ if (!MI->getOperand(0).isReg())
+ return;
+
+ MachineBasicBlock::iterator DI = MI; ++DI;
+ for (MachineBasicBlock::iterator DE = MI->getParent()->end();
+ DI != DE; ++DI) {
+ if (!DI->isDebugValue())
+ return;
+ if (DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg() == MI->getOperand(0).getReg())
+ DbgValues.push_back(DI);
+ }
+}
+
+/// isPostDominatedBy - Return true if A is post dominated by B.
+static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
+
+ // FIXME - Use real post dominator.
+ if (A->succ_size() != 2)
+ return false;
+ MachineBasicBlock::succ_iterator I = A->succ_begin();
+ if (B == *I)
+ ++I;
+ MachineBasicBlock *OtherSuccBlock = *I;
+ if (OtherSuccBlock->succ_size() != 1 ||
+ *(OtherSuccBlock->succ_begin()) != B)
+ return false;
+
+ return true;
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo) {
+ assert (MI && "Invalid MachineInstr!");
+ assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+ if (MBB == SuccToSinkTo)
+ return false;
+
+ // It is profitable if SuccToSinkTo does not post dominate current block.
+ if (!isPostDominatedBy(MBB, SuccToSinkTo))
+ return true;
+
+ // Check if only use in post dominated block is PHI instruction.
+ bool NonPHIUse = false;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseBlock == SuccToSinkTo && !UseInst->isPHI())
+ NonPHIUse = true;
+ }
+ if (!NonPHIUse)
+ return true;
+
+ // If SuccToSinkTo post dominates then also it may be profitable if MI
+ // can further profitably sinked into another block in next round.
+ bool BreakPHIEdge = false;
+ // FIXME - If finding successor is compile time expensive then catch results.
+ if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
+ return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
+
+ // If SuccToSinkTo is final destination and it is a post dominator of current
+ // block then it is not profitable to sink MI into SuccToSinkTo block.
+ return false;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool &BreakPHIEdge) {
+
+ assert (MI && "Invalid MachineInstr!");
+ assert (MBB && "Invalid MachineBasicBlock!");
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ MachineBasicBlock *SuccToSinkTo = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue; // Ignore non-register operands.
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ return NULL;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return NULL;
+ }
+ } else {
+ // Virtual register uses are always safe to sink.
+ if (MO.isUse()) continue;
+
+ // If it's not safe to move defs of the register class, then abort.
+ if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
+ return NULL;
+
+ // FIXME: This picks a successor to sink into based on having one
+ // successor that dominates all the uses. However, there are cases where
+ // sinking can happen but where the sink point isn't a successor. For
+ // example:
+ //
+ // x = computation
+ // if () {} else {}
+ // use x
+ //
+ // the instruction could be sunk over the whole diamond for the
+ // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+ // after that.
+
+ // Virtual register defs can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ if (SuccToSinkTo) {
+ // If a previous operand picked a block to sink to, then this operand
+ // must be sinkable to the same block.
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
+ BreakPHIEdge, LocalUse))
+ return NULL;
+
+ continue;
+ }
+
+ // Otherwise, we should look at all the successors and decide which one
+ // we should sink to.
+ // We give successors with smaller loop depth higher priority.
+ SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
+ std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI));
+ for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(),
+ E = Succs.end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBlock = *SI;
+ bool LocalUse = false;
+ if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
+ BreakPHIEdge, LocalUse)) {
+ SuccToSinkTo = SuccBlock;
+ break;
+ }
+ if (LocalUse)
+ // Def is used locally, it's never safe to move this def.
+ return NULL;
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return NULL;
+ else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+ return NULL;
+ }
+ }
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MBB == SuccToSinkTo)
+ return NULL;
+
+ // It's not safe to sink instructions to EH landing pad. Control flow into
+ // landing pad is implicitly defined.
+ if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+ return NULL;
+
+ return SuccToSinkTo;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+ // be close to the source to make it easier to coalesce.
+ if (AvoidsSinking(MI, MRI))
+ return false;
+
+ // Check if it's safe to move the instruction.
+ if (!MI->isSafeToMove(TII, AA, SawStore))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ bool BreakPHIEdge = false;
+ MachineBasicBlock *ParentBlock = MI->getParent();
+ MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+
+ // If there are no outputs, it must have side-effects.
+ if (SuccToSinkTo == 0)
+ return false;
+
+
+ // If the instruction to move defines a dead physical register which is live
+ // when leaving the basic block, don't move it because it could turn into a
+ // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (SuccToSinkTo->isLiveIn(Reg))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);
+
+ // If the block has multiple predecessors, this would introduce computation on
+ // a path that it doesn't already exist. We could split the critical edge,
+ // but for now we just punt.
+ if (SuccToSinkTo->pred_size() > 1) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ bool TryBreak = false;
+ bool store = true;
+ if (!MI->isSafeToMove(TII, AA, store)) {
+ DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+ TryBreak = true;
+ }
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+ TryBreak = true;
+ }
+
+ // Don't sink instructions into a loop.
+ if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ TryBreak = true;
+ }
+
+ // Otherwise we are OK with sinking along a critical edge.
+ if (!TryBreak)
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ else {
+ MachineBasicBlock *NewSucc =
+ SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ } else {
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ BreakPHIEdge = false;
+ }
+ }
+ }
+
+ if (BreakPHIEdge) {
+ // BreakPHIEdge is true if all the uses are in the successor MBB being
+ // sunken into and they are all PHI nodes. In this case, machine-sink must
+ // break the critical edge first.
+ MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
+ ++InsertPos;
+
+ // collect matching debug values.
+ SmallVector<MachineInstr *, 2> DbgValuesToSink;
+ collectDebugValues(MI, DbgValuesToSink);
+
+ // Move the instruction.
+ SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
+ ++MachineBasicBlock::iterator(MI));
+
+ // Move debug values.
+ for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(),
+ DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
+ MachineInstr *DbgMI = *DBI;
+ SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI,
+ ++MachineBasicBlock::iterator(DbgMI));
+ }
+
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
+ MI->clearKillInfo();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
new file mode 100644
index 0000000..49d8c4e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -0,0 +1,1290 @@
+//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-trace-metrics"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+char MachineTraceMetrics::ID = 0;
+char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
+
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+
+MachineTraceMetrics::MachineTraceMetrics()
+ : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) {
+ std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0);
+}
+
+void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ TII = MF->getTarget().getInstrInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+ MRI = &MF->getRegInfo();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ const TargetSubtargetInfo &ST =
+ MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+ SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ BlockInfo.resize(MF->getNumBlockIDs());
+ ProcResourceCycles.resize(MF->getNumBlockIDs() *
+ SchedModel.getNumProcResourceKinds());
+ return false;
+}
+
+void MachineTraceMetrics::releaseMemory() {
+ MF = 0;
+ BlockInfo.clear();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i) {
+ delete Ensembles[i];
+ Ensembles[i] = 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed block information
+//===----------------------------------------------------------------------===//
+//
+// The number of instructions in a basic block and the CPU resources used by
+// those instructions don't depend on any given trace strategy.
+
+/// Compute the resource usage in basic block MBB.
+const MachineTraceMetrics::FixedBlockInfo*
+MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
+ assert(MBB && "No basic block");
+ FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
+ if (FBI->hasResources())
+ return FBI;
+
+ // Compute resource usage in the block.
+ FBI->HasCalls = false;
+ unsigned InstrCount = 0;
+
+ // Add up per-processor resource cycles as well.
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ SmallVector<unsigned, 32> PRCycles(PRKinds);
+
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *MI = I;
+ if (MI->isTransient())
+ continue;
+ ++InstrCount;
+ if (MI->isCall())
+ FBI->HasCalls = true;
+
+ // Count processor resources used.
+ if (!SchedModel.hasInstrSchedModel())
+ continue;
+ const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
+ if (!SC->isValid())
+ continue;
+
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
+ PRCycles[PI->ProcResourceIdx] += PI->Cycles;
+ }
+ }
+ FBI->InstrCount = InstrCount;
+
+ // Scale the resource cycles so they are comparable.
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceCycles[PROffset + K] =
+ PRCycles[K] * SchedModel.getResourceFactor(K);
+
+ return FBI;
+}
+
+ArrayRef<unsigned>
+MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
+ assert(BlockInfo[MBBNum].hasResources() &&
+ "getResources() must be called before getProcResourceCycles()");
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
+ return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds,
+ PRKinds);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Ensemble utility functions
+//===----------------------------------------------------------------------===//
+
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+ : MTM(*ct) {
+ BlockInfo.resize(MTM.BlockInfo.size());
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
+ ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
+}
+
+// Virtual destructor serves as an anchor.
+MachineTraceMetrics::Ensemble::~Ensemble() {}
+
+const MachineLoop*
+MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
+ return MTM.Loops->getLoopFor(MBB);
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace above MBB.
+void MachineTraceMetrics::Ensemble::
+computeDepthResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+
+ // Compute resources from trace above. The top block is simple.
+ if (!TBI->Pred) {
+ TBI->InstrDepth = 0;
+ TBI->Head = MBB->getNumber();
+ std::fill(ProcResourceDepths.begin() + PROffset,
+ ProcResourceDepths.begin() + PROffset + PRKinds, 0);
+ return;
+ }
+
+ // Compute from the block above. A post-order traversal ensures the
+ // predecessor is always computed first.
+ unsigned PredNum = TBI->Pred->getNumber();
+ TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
+ assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
+ const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
+ TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
+ TBI->Head = PredTBI->Head;
+
+ // Compute per-resource depths.
+ ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
+ ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace below MBB.
+void MachineTraceMetrics::Ensemble::
+computeHeightResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+
+ // Compute resources for the current block.
+ TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+ ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
+
+ // The trace tail is done.
+ if (!TBI->Succ) {
+ TBI->Tail = MBB->getNumber();
+ std::copy(PRCycles.begin(), PRCycles.end(),
+ ProcResourceHeights.begin() + PROffset);
+ return;
+ }
+
+ // Compute from the block below. A post-order traversal ensures the
+ // predecessor is always computed first.
+ unsigned SuccNum = TBI->Succ->getNumber();
+ TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
+ assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
+ TBI->InstrHeight += SuccTBI->InstrHeight;
+ TBI->Tail = SuccTBI->Tail;
+
+ // Compute per-resource heights.
+ ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
+}
+
+// Check if depth resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getDepthResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidDepth() ? TBI : 0;
+}
+
+// Check if height resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getHeightResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidHeight() ? TBI : 0;
+}
+
+/// Get an array of processor resource depths for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by all blocks preceding MBB in its trace. It does not include instructions
+/// in MBB.
+///
+/// Compare TraceBlockInfo::InstrDepth.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceDepths(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
+ return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds,
+ PRKinds);
+}
+
+/// Get an array of processor resource heights for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by this block and all blocks following it in its trace.
+///
+/// Compare TraceBlockInfo::InstrHeight.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceHeights(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
+ return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds,
+ PRKinds);
+}
+
+//===----------------------------------------------------------------------===//
+// Trace Selection Strategies
+//===----------------------------------------------------------------------===//
+//
+// A trace selection strategy is implemented as a sub-class of Ensemble. The
+// trace through a block B is computed by two DFS traversals of the CFG
+// starting from B. One upwards, and one downwards. During the upwards DFS,
+// pickTracePred() is called on the post-ordered blocks. During the downwards
+// DFS, pickTraceSucc() is called in a post-order.
+//
+
+// We never allow traces that leave loops, but we do allow traces to enter
+// nested loops. We also never allow traces to contain back-edges.
+//
+// This means that a loop header can never appear above the center block of a
+// trace, except as the trace head. Below the center block, loop exiting edges
+// are banned.
+//
+// Return true if an edge from the From loop to the To loop is leaving a loop.
+// Either of To and From can be null.
+static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
+ return From && !From->contains(To);
+}
+
+// MinInstrCountEnsemble - Pick the trace that executes the least number of
+// instructions.
+namespace {
+class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const { return "MinInstr"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock*);
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*);
+
+public:
+ MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+ : MachineTraceMetrics::Ensemble(mtm) {}
+};
+}
+
+// Select the preferred predecessor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ // Don't leave loops, and never follow back-edges.
+ if (CurLoop && MBB == CurLoop->getHeader())
+ return 0;
+ unsigned CurCount = MTM.getResources(MBB)->InstrCount;
+ const MachineBasicBlock *Best = 0;
+ unsigned BestDepth = 0;
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ const MachineBasicBlock *Pred = *I;
+ const MachineTraceMetrics::TraceBlockInfo *PredTBI =
+ getDepthResources(Pred);
+ // Ignore cycles that aren't natural loops.
+ if (!PredTBI)
+ continue;
+ // Pick the predecessor that would give this block the smallest InstrDepth.
+ unsigned Depth = PredTBI->InstrDepth + CurCount;
+ if (!Best || Depth < BestDepth)
+ Best = Pred, BestDepth = Depth;
+ }
+ return Best;
+}
+
+// Select the preferred successor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ const MachineBasicBlock *Best = 0;
+ unsigned BestHeight = 0;
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ // Don't consider back-edges.
+ if (CurLoop && Succ == CurLoop->getHeader())
+ continue;
+ // Don't consider successors exiting CurLoop.
+ if (isExitingLoop(CurLoop, getLoopFor(Succ)))
+ continue;
+ const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
+ getHeightResources(Succ);
+ // Ignore cycles that aren't natural loops.
+ if (!SuccTBI)
+ continue;
+ // Pick the successor that would give this block the smallest InstrHeight.
+ unsigned Height = SuccTBI->InstrHeight;
+ if (!Best || Height < BestHeight)
+ Best = Succ, BestHeight = Height;
+ }
+ return Best;
+}
+
+// Get an Ensemble sub-class for the requested trace strategy.
+MachineTraceMetrics::Ensemble *
+MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
+ assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[strategy];
+ if (E)
+ return E;
+
+ // Allocate new Ensemble on demand.
+ switch (strategy) {
+ case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+ default: llvm_unreachable("Invalid trace strategy enum");
+ }
+}
+
+void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+ BlockInfo[MBB->getNumber()].invalidate();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->invalidate(MBB);
+}
+
+void MachineTraceMetrics::verifyAnalysis() const {
+ if (!MF)
+ return;
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->verify();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Trace building
+//===----------------------------------------------------------------------===//
+//
+// Traces are built by two CFG traversals. To avoid recomputing too much, use a
+// set abstraction that confines the search to the current loop, and doesn't
+// revisit blocks.
+
+namespace {
+struct LoopBounds {
+ MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+ SmallPtrSet<const MachineBasicBlock*, 8> Visited;
+ const MachineLoopInfo *Loops;
+ bool Downward;
+ LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
+ const MachineLoopInfo *loops)
+ : Blocks(blocks), Loops(loops), Downward(false) {}
+};
+}
+
+// Specialize po_iterator_storage in order to prune the post-order traversal so
+// it is limited to the current loop and doesn't traverse the loop back edges.
+namespace llvm {
+template<>
+class po_iterator_storage<LoopBounds, true> {
+ LoopBounds &LB;
+public:
+ po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+ void finishPostorder(const MachineBasicBlock*) {}
+
+ bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+ // Skip already visited To blocks.
+ MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
+ if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
+ return false;
+ // From is null once when To is the trace center block.
+ if (From) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+ // Don't follow backedges, don't leave FromLoop when going upwards.
+ if ((LB.Downward ? To : From) == FromLoop->getHeader())
+ return false;
+ // Don't leave FromLoop.
+ if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+ return false;
+ }
+ }
+ // To is a new block. Mark the block as visited in case the CFG has cycles
+ // that MachineLoopInfo didn't recognize as a natural loop.
+ return LB.Visited.insert(To);
+ }
+};
+}
+
+/// Compute the trace through MBB.
+void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
+ << MBB->getNumber() << '\n');
+ // Set up loop bounds for the backwards post-order traversal.
+ LoopBounds Bounds(BlockInfo, MTM.Loops);
+
+ // Run an upwards post-order search for the trace start.
+ Bounds.Downward = false;
+ Bounds.Visited.clear();
+ typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
+ for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the predecessors have been visited, pick the preferred one.
+ TBI.Pred = pickTracePred(*I);
+ DEBUG({
+ if (TBI.Pred)
+ dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leading to I is now known, compute the depth resources.
+ computeDepthResources(*I);
+ }
+
+ // Run a downwards post-order search for the trace end.
+ Bounds.Downward = true;
+ Bounds.Visited.clear();
+ typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
+ for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the successors have been visited, pick the preferred one.
+ TBI.Succ = pickTraceSucc(*I);
+ DEBUG({
+ if (TBI.Succ)
+ dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leaving I is now known, compute the height resources.
+ computeHeightResources(*I);
+ }
+}
+
+/// Invalidate traces through BadMBB.
+void
+MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
+ SmallVector<const MachineBasicBlock*, 16> WorkList;
+ TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
+
+ // Invalidate height resources of blocks above MBB.
+ if (BadTBI.hasValidHeight()) {
+ BadTBI.invalidateHeight();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " height.\n");
+ // Find any MBB predecessors that have MBB as their preferred successor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidHeight())
+ continue;
+ if (TBI.Succ == MBB) {
+ TBI.invalidateHeight();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Succ is actually a *I successor.
+ assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Invalidate depth resources of blocks below MBB.
+ if (BadTBI.hasValidDepth()) {
+ BadTBI.invalidateDepth();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " depth.\n");
+ // Find any MBB successors that have MBB as their preferred predecessor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidDepth())
+ continue;
+ if (TBI.Pred == MBB) {
+ TBI.invalidateDepth();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Pred is actually a *I predecessor.
+ assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Clear any per-instruction data. We only have to do this for BadMBB itself
+ // because the instructions in that block may change. Other blocks may be
+ // invalidated, but their instructions will stay the same, so there is no
+ // need to erase the Cycle entries. They will be overwritten when we
+ // recompute.
+ for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end();
+ I != E; ++I)
+ Cycles.erase(I);
+}
+
+void MachineTraceMetrics::Ensemble::verify() const {
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
+ "Outdated BlockInfo size");
+ for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
+ const TraceBlockInfo &TBI = BlockInfo[Num];
+ if (TBI.hasValidDepth() && TBI.Pred) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
+ "Trace is broken, depth should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
+ }
+ if (TBI.hasValidHeight() && TBI.Succ) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
+ "Trace is broken, height should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
+ assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
+ "Trace contains backedge");
+ }
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Data Dependencies
+//===----------------------------------------------------------------------===//
+//
+// Compute the depth and height of each instruction based on data dependencies
+// and instruction latencies. These cycle numbers assume that the CPU can issue
+// an infinite number of instructions per cycle as long as their dependencies
+// are ready.
+
+// A data dependency is represented as a defining MI and operand numbers on the
+// defining and using MI.
+namespace {
+struct DataDep {
+ const MachineInstr *DefMI;
+ unsigned DefOp;
+ unsigned UseOp;
+
+ DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
+ : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
+
+ /// Create a DataDep from an SSA form virtual register.
+ DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
+ : UseOp(UseOp) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+ MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
+ assert(!DefI.atEnd() && "Register has no defs");
+ DefMI = &*DefI;
+ DefOp = DefI.getOperandNo();
+ assert((++DefI).atEnd() && "Register has multiple defs");
+ }
+};
+}
+
+// Get the input data dependencies that must be ready before UseMI can issue.
+// Return true if UseMI has any physreg operands.
+static bool getDataDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineRegisterInfo *MRI) {
+ bool HasPhysRegs = false;
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ HasPhysRegs = true;
+ continue;
+ }
+ // Collect virtual register reads.
+ if (MO->readsReg())
+ Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+ }
+ return HasPhysRegs;
+}
+
+// Get the input data dependencies of a PHI instruction, using Pred as the
+// preferred predecessor.
+// This will add at most one dependency to Deps.
+static void getPHIDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineBasicBlock *Pred,
+ const MachineRegisterInfo *MRI) {
+ // No predecessor at the beginning of a trace. Ignore dependencies.
+ if (!Pred)
+ return;
+ assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
+ for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
+ if (UseMI->getOperand(i + 1).getMBB() == Pred) {
+ unsigned Reg = UseMI->getOperand(i).getReg();
+ Deps.push_back(DataDep(MRI, Reg, i));
+ return;
+ }
+ }
+}
+
+// Keep track of physreg data dependencies by recording each live register unit.
+// Associate each regunit with an instruction operand. Depending on the
+// direction instructions are scanned, it could be the operand that defined the
+// regunit, or the highest operand to read the regunit.
+namespace {
+struct LiveRegUnit {
+ unsigned RegUnit;
+ unsigned Cycle;
+ const MachineInstr *MI;
+ unsigned Op;
+
+ unsigned getSparseSetIndex() const { return RegUnit; }
+
+ LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {}
+};
+}
+
+// Identify physreg dependencies for UseMI, and update the live regunit
+// tracking set when scanning instructions downwards.
+static void updatePhysDepsDownwards(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> Kills;
+ SmallVector<unsigned, 8> LiveDefOps;
+
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // Track live defs and kills for updating RegUnits.
+ if (MO->isDef()) {
+ if (MO->isDead())
+ Kills.push_back(Reg);
+ else
+ LiveDefOps.push_back(MO.getOperandNo());
+ } else if (MO->isKill())
+ Kills.push_back(Reg);
+ // Identify dependencies.
+ if (!MO->readsReg())
+ continue;
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+ break;
+ }
+ }
+
+ // Update RegUnits to reflect live registers after UseMI.
+ // First kills.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+ RegUnits.erase(*Units);
+
+ // Second, live defs.
+ for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
+ unsigned DefOp = LiveDefOps[i];
+ for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+ Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ LRU.MI = UseMI;
+ LRU.Op = DefOp;
+ }
+ }
+}
+
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+/// possible that the critical path through the trace doesn't include any
+/// instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+ assert(TBI.HasValidInstrDepths && "Missing depth info");
+ assert(TBI.HasValidInstrHeights && "Missing height info");
+ unsigned MaxLen = 0;
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ const LiveInReg &LIR = TBI.LiveIns[i];
+ if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ continue;
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ // Ignore dependencies outside the current trace.
+ const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+ if (!DefTBI.isUsefulDominator(TBI))
+ continue;
+ unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+ MaxLen = std::max(MaxLen, Len);
+ }
+ return MaxLen;
+}
+
+/// Compute instruction depths for all instructions above or in MBB in its
+/// trace. This assumes that the trace through MBB has already been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrDepths(const MachineBasicBlock *MBB) {
+ // The top of the trace may already be computed, and HasValidInstrDepths
+ // implies Head->HasValidInstrDepths, so we only need to start from the first
+ // block in the trace that needs to be recomputed.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidDepth() && "Incomplete trace");
+ if (TBI.HasValidInstrDepths)
+ break;
+ Stack.push_back(MBB);
+ MBB = TBI.Pred;
+ } while (MBB);
+
+ // FIXME: If MBB is non-null at this point, it is the last pre-computed block
+ // in the trace. We should track any live-out physregs that were defined in
+ // the trace. This is quite rare in SSA form, typically created by CSE
+ // hoisting a compare.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // Go through trace blocks in top-down order, stopping after the center block.
+ SmallVector<DataDep, 8> Deps;
+ while (!Stack.empty()) {
+ MBB = Stack.pop_back_val();
+ DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrDepths = true;
+ TBI.CriticalPath = 0;
+
+ // Print out resource depths here as well.
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
+ ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ if (PRDepths[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
+ // Also compute the critical path length through MBB when possible.
+ if (TBI.HasValidInstrHeights)
+ TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *UseMI = I;
+
+ // Collect all data dependencies.
+ Deps.clear();
+ if (UseMI->isPHI())
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI);
+
+ // Filter and process dependencies, computing the earliest issue cycle.
+ unsigned Cycle = 0;
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ const DataDep &Dep = Deps[i];
+ const TraceBlockInfo&DepTBI =
+ BlockInfo[Dep.DefMI->getParent()->getNumber()];
+ // Ignore dependencies from outside the current trace.
+ if (!DepTBI.isUsefulDominator(TBI))
+ continue;
+ assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+ unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp,
+ /* FindMin = */ false);
+ Cycle = std::max(Cycle, DepCycle);
+ }
+ // Remember the instruction depth.
+ InstrCycles &MICycles = Cycles[UseMI];
+ MICycles.Depth = Cycle;
+
+ if (!TBI.HasValidInstrHeights) {
+ DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
+ }
+ }
+}
+
+// Identify physreg dependencies for MI when scanning instructions upwards.
+// Return the issue height of MI after considering any live regunits.
+// Height is the issue height computed from virtual register dependencies alone.
+static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetSchedModel &SchedModel,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> ReadOps;
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MO->readsReg())
+ ReadOps.push_back(MO.getOperandNo());
+ if (!MO->isDef())
+ continue;
+ // This is a def of Reg. Remove corresponding entries from RegUnits, and
+ // update MI Height to consider the physreg dependencies.
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ unsigned DepHeight = I->Cycle;
+ if (!MI->isTransient()) {
+ // We may not know the UseMI of this dependency, if it came from the
+ // live-in list. SchedModel can handle a NULL UseMI.
+ DepHeight += SchedModel
+ .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op,
+ /* FindMin = */ false);
+ }
+ Height = std::max(Height, DepHeight);
+ // This regunit is dead above MI.
+ RegUnits.erase(I);
+ }
+ }
+
+ // Now we know the height of MI. Update any regunits read.
+ for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
+ unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ // Set the height to the highest reader of the unit.
+ if (LRU.Cycle <= Height && LRU.MI != MI) {
+ LRU.Cycle = Height;
+ LRU.MI = MI;
+ LRU.Op = ReadOps[i];
+ }
+ }
+ }
+
+ return Height;
+}
+
+
+typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+
+// Push the height of DefMI upwards if required to match UseMI.
+// Return true if this is the first time DefMI was seen.
+static bool pushDepHeight(const DataDep &Dep,
+ const MachineInstr *UseMI, unsigned UseHeight,
+ MIHeightMap &Heights,
+ const TargetSchedModel &SchedModel,
+ const TargetInstrInfo *TII) {
+ // Adjust height by Dep.DefMI latency.
+ if (!Dep.DefMI->isTransient())
+ UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp, false);
+
+ // Update Heights[DefMI] to be the maximum height seen.
+ MIHeightMap::iterator I;
+ bool New;
+ tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+ if (New)
+ return true;
+
+ // DefMI has been pushed before. Give it the max height.
+ if (I->second < UseHeight)
+ I->second = UseHeight;
+ return false;
+}
+
+/// Assuming that the virtual register defined by DefMI:DefOp was used by
+/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
+/// when reaching the block that contains DefMI.
+void MachineTraceMetrics::Ensemble::
+addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
+ ArrayRef<const MachineBasicBlock*> Trace) {
+ assert(!Trace.empty() && "Trace should contain at least one block");
+ unsigned Reg = DefMI->getOperand(DefOp).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const MachineBasicBlock *DefMBB = DefMI->getParent();
+
+ // Reg is live-in to all blocks in Trace that follow DefMBB.
+ for (unsigned i = Trace.size(); i; --i) {
+ const MachineBasicBlock *MBB = Trace[i-1];
+ if (MBB == DefMBB)
+ return;
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ // Just add the register. The height will be updated later.
+ TBI.LiveIns.push_back(Reg);
+ }
+}
+
+/// Compute instruction heights in the trace through MBB. This updates MBB and
+/// the blocks below it in the trace. It is assumed that the trace has already
+/// been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrHeights(const MachineBasicBlock *MBB) {
+ // The bottom of the trace may already be computed.
+ // Find the blocks that need updating.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidHeight() && "Incomplete trace");
+ if (TBI.HasValidInstrHeights)
+ break;
+ Stack.push_back(MBB);
+ TBI.LiveIns.clear();
+ MBB = TBI.Succ;
+ } while (MBB);
+
+ // As we move upwards in the trace, keep track of instructions that are
+ // required by deeper trace instructions. Map MI -> height required so far.
+ MIHeightMap Heights;
+
+ // For physregs, the def isn't known when we see the use.
+ // Instead, keep track of the highest use of each regunit.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // If the bottom of the trace was already precomputed, initialize heights
+ // from its live-in list.
+ // MBB is the highest precomputed block in the trace.
+ if (MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg LI = TBI.LiveIns[i];
+ if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+ // For virtual registers, the def latency is included.
+ unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
+ if (Height < LI.Height)
+ Height = LI.Height;
+ } else {
+ // For register units, the def latency is not included because we don't
+ // know the def yet.
+ RegUnits[LI.Reg].Cycle = LI.Height;
+ }
+ }
+ }
+
+ // Go through the trace blocks in bottom-up order.
+ SmallVector<DataDep, 8> Deps;
+ for (;!Stack.empty(); Stack.pop_back()) {
+ MBB = Stack.back();
+ DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrHeights = true;
+ TBI.CriticalPath = 0;
+
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
+ ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
+ for (unsigned K = 0; K != PRHeights.size(); ++K)
+ if (PRHeights[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
+ // Get dependencies from PHIs in the trace successor.
+ const MachineBasicBlock *Succ = TBI.Succ;
+ // If MBB is the last block in the trace, and it has a back-edge to the
+ // loop header, get loop-carried dependencies from PHIs in the header. For
+ // that purpose, pretend that all the loop header PHIs have height 0.
+ if (!Succ)
+ if (const MachineLoop *Loop = getLoopFor(MBB))
+ if (MBB->isSuccessor(Loop->getHeader()))
+ Succ = Loop->getHeader();
+
+ if (Succ) {
+ for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
+ I != E && I->isPHI(); ++I) {
+ const MachineInstr *PHI = I;
+ Deps.clear();
+ getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ if (!Deps.empty()) {
+ // Loop header PHI heights are all 0.
+ unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
+ if (pushDepHeight(Deps.front(), PHI, Height,
+ Heights, MTM.SchedModel, MTM.TII))
+ addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
+ }
+ }
+ }
+
+ // Go through the block backwards.
+ for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
+ BI != BB;) {
+ const MachineInstr *MI = --BI;
+
+ // Find the MI height as determined by virtual register uses in the
+ // trace below.
+ unsigned Cycle = 0;
+ MIHeightMap::iterator HeightI = Heights.find(MI);
+ if (HeightI != Heights.end()) {
+ Cycle = HeightI->second;
+ // We won't be seeing any more MI uses.
+ Heights.erase(HeightI);
+ }
+
+ // Don't process PHI deps. They depend on the specific predecessor, and
+ // we'll get them when visiting the predecessor.
+ Deps.clear();
+ bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+
+ // There may also be regunit dependencies to include in the height.
+ if (HasPhysRegs)
+ Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
+ MTM.SchedModel, MTM.TII, MTM.TRI);
+
+ // Update the required height of any virtual registers read by MI.
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i)
+ if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
+ addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
+
+ InstrCycles &MICycles = Cycles[MI];
+ MICycles.Height = Cycle;
+ if (!TBI.HasValidInstrDepths) {
+ DEBUG(dbgs() << Cycle << '\t' << *MI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
+ }
+
+ // Update virtual live-in heights. They were added by addLiveIns() with a 0
+ // height because the final height isn't known until now.
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg &LIR = TBI.LiveIns[i];
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ LIR.Height = Heights.lookup(DefMI);
+ DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+ }
+
+ // Transfer the live regunits to the live-in list.
+ for (SparseSet<LiveRegUnit>::const_iterator
+ RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
+ TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
+ DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+ << '@' << RI->Cycle);
+ }
+ DEBUG(dbgs() << '\n');
+
+ if (!TBI.HasValidInstrDepths)
+ continue;
+ // Add live-ins to the critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath,
+ computeCrossBlockCriticalPath(TBI));
+ DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+ }
+}
+
+MachineTraceMetrics::Trace
+MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
+ // FIXME: Check cache tags, recompute as needed.
+ computeTrace(MBB);
+ computeInstrDepths(MBB);
+ computeInstrHeights(MBB);
+ return Trace(*this, BlockInfo[MBB->getNumber()]);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
+ assert(MI && "Not an instruction.");
+ assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
+ "MI must be in the trace center block");
+ InstrCycles Cyc = getInstrCycles(MI);
+ return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
+ const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+ SmallVector<DataDep, 1> Deps;
+ getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+ assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+ DataDep &Dep = Deps.front();
+ unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += TE.MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false);
+ return DepCycle;
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+ // Find the limiting processor resource.
+ // Numbers have been pre-scaled to be comparable.
+ unsigned PRMax = 0;
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ if (Bottom) {
+ ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
+ } else {
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K]);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ unsigned Instrs = TBI.InstrDepth;
+ if (Bottom)
+ Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return std::max(Instrs, PRMax);
+}
+
+unsigned MachineTraceMetrics::Trace::
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+ // Add up resources above and below the center block.
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
+ unsigned PRMax = 0;
+ for (unsigned K = 0; K != PRDepths.size(); ++K) {
+ unsigned PRCycles = PRDepths[K] + PRHeights[K];
+ for (unsigned I = 0; I != Extrablocks.size(); ++I)
+ PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+ PRMax = std::max(PRMax, PRCycles);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
+ Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return std::max(Instrs, PRMax);
+}
+
+void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
+ OS << getName() << " ensemble:\n";
+ for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+ OS << " BB#" << i << '\t';
+ BlockInfo[i].print(OS);
+ OS << '\n';
+ }
+}
+
+void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
+ if (hasValidDepth()) {
+ OS << "depth=" << InstrDepth;
+ if (Pred)
+ OS << " pred=BB#" << Pred->getNumber();
+ else
+ OS << " pred=null";
+ OS << " head=BB#" << Head;
+ if (HasValidInstrDepths)
+ OS << " +instrs";
+ } else
+ OS << "depth invalid";
+ OS << ", ";
+ if (hasValidHeight()) {
+ OS << "height=" << InstrHeight;
+ if (Succ)
+ OS << " succ=BB#" << Succ->getNumber();
+ else
+ OS << " succ=null";
+ OS << " tail=BB#" << Tail;
+ if (HasValidInstrHeights)
+ OS << " +instrs";
+ } else
+ OS << "height invalid";
+ if (HasValidInstrDepths && HasValidInstrHeights)
+ OS << ", crit=" << CriticalPath;
+}
+
+void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
+ unsigned MBBNum = &TBI - &TE.BlockInfo[0];
+
+ OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
+ << " --> BB#" << TBI.Tail << ':';
+ if (TBI.hasValidHeight() && TBI.hasValidDepth())
+ OS << ' ' << getInstrCount() << " instrs.";
+ if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+ OS << ' ' << TBI.CriticalPath << " cycles.";
+
+ const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
+ OS << "\nBB#" << MBBNum;
+ while (Block->hasValidDepth() && Block->Pred) {
+ unsigned Num = Block->Pred->getNumber();
+ OS << " <- BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+
+ Block = &TBI;
+ OS << "\n ";
+ while (Block->hasValidHeight() && Block->Succ) {
+ unsigned Num = Block->Succ->getNumber();
+ OS << " -> BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+ OS << '\n';
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 0000000..4b12300
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,1605 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+namespace {
+ struct MachineVerifier {
+
+ MachineVerifier(Pass *pass, const char *b) :
+ PASS(pass),
+ Banner(b),
+ OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+ {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ Pass *const PASS;
+ const char *Banner;
+ const char *const OutFileName;
+ raw_ostream *OS;
+ const MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+ unsigned foundErrors;
+
+ typedef SmallVector<unsigned, 16> RegVector;
+ typedef SmallVector<const uint32_t*, 4> RegMaskVector;
+ typedef DenseSet<unsigned> RegSet;
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+ typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
+
+ const MachineInstr *FirstTerminator;
+ BlockSet FunctionBlocks;
+
+ BitVector regsReserved;
+ RegSet regsLive;
+ RegVector regsDefined, regsDead, regsKilled;
+ RegMaskVector regMasks;
+ RegSet regsLiveInButUnused;
+
+ SlotIndex lastIndex;
+
+ // Add Reg and any sub-registers to RV
+ void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ RV.push_back(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RV.push_back(*SubRegs);
+ }
+
+ struct BBInfo {
+ // Is this MBB reachable from the MF entry point?
+ bool reachable;
+
+ // Vregs that must be live in because they are used without being
+ // defined. Map value is the user.
+ RegMap vregsLiveIn;
+
+ // Regs killed in MBB. They may be defined again, and will then be in both
+ // regsKilled and regsLiveOut.
+ RegSet regsKilled;
+
+ // Regs defined in MBB and live out. Note that vregs passing through may
+ // be live out without being mentioned here.
+ RegSet regsLiveOut;
+
+ // Vregs that pass through MBB untouched. This set is disjoint from
+ // regsKilled and regsLiveOut.
+ RegSet vregsPassed;
+
+ // Vregs that must pass through MBB because they are needed by a successor
+ // block. This set is disjoint from regsLiveOut.
+ RegSet vregsRequired;
+
+ // Set versions of block's predecessor and successor lists.
+ BlockSet Preds, Succs;
+
+ BBInfo() : reachable(false) {}
+
+ // Add register to vregsPassed if it belongs there. Return true if
+ // anything changed.
+ bool addPassed(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+ return false;
+ return vregsPassed.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addPassed(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addPassed(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Add register to vregsRequired if it belongs there. Return true if
+ // anything changed.
+ bool addRequired(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsLiveOut.count(Reg))
+ return false;
+ return vregsRequired.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addRequired(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addRequired(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Same for a full map.
+ bool addRequired(const RegMap &RM) {
+ bool changed = false;
+ for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
+ if (addRequired(I->first))
+ changed = true;
+ return changed;
+ }
+
+ // Live-out registers are either in regsLiveOut or vregsPassed.
+ bool isLiveOut(unsigned Reg) const {
+ return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+ }
+ };
+
+ // Extra register info per MBB.
+ DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+ bool isReserved(unsigned Reg) {
+ return Reg < regsReserved.size() && regsReserved.test(Reg);
+ }
+
+ bool isAllocatable(unsigned Reg) {
+ return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
+ }
+
+ // Analysis information if available
+ LiveVariables *LiveVars;
+ LiveIntervals *LiveInts;
+ LiveStacks *LiveStks;
+ SlotIndexes *Indexes;
+
+ void visitMachineFunctionBefore();
+ void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineBundleBefore(const MachineInstr *MI);
+ void visitMachineInstrBefore(const MachineInstr *MI);
+ void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+ void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBundleAfter(const MachineInstr *MI);
+ void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+ void visitMachineFunctionAfter();
+
+ void report(const char *msg, const MachineFunction *MF);
+ void report(const char *msg, const MachineBasicBlock *MBB);
+ void report(const char *msg, const MachineInstr *MI);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+ void report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI);
+ void report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI);
+
+ void verifyInlineAsm(const MachineInstr *MI);
+
+ void checkLiveness(const MachineOperand *MO, unsigned MONum);
+ void markReachable(const MachineBasicBlock *MBB);
+ void calcRegsPassed();
+ void checkPHIOps(const MachineBasicBlock *MBB);
+
+ void calcRegsRequired();
+ void verifyLiveVariables();
+ void verifyLiveIntervals();
+ void verifyLiveInterval(const LiveInterval&);
+ void verifyLiveIntervalValue(const LiveInterval&, VNInfo*);
+ void verifyLiveIntervalSegment(const LiveInterval&,
+ LiveInterval::const_iterator);
+ };
+
+ struct MachineVerifierPass : public MachineFunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+ const char *const Banner;
+
+ MachineVerifierPass(const char *b = 0)
+ : MachineFunctionPass(ID), Banner(b) {
+ initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ MF.verify(this, Banner);
+ return false;
+ }
+ };
+
+}
+
+char MachineVerifierPass::ID = 0;
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+ "Verify generated machine code", false, false)
+
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+ return new MachineVerifierPass(Banner);
+}
+
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+ MachineVerifier(p, Banner)
+ .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
+bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+ raw_ostream *OutFile = 0;
+ if (OutFileName) {
+ std::string ErrorInfo;
+ OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+ raw_fd_ostream::F_Append);
+ if (!ErrorInfo.empty()) {
+ errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+ exit(1);
+ }
+
+ OS = OutFile;
+ } else {
+ OS = &errs();
+ }
+
+ foundErrors = 0;
+
+ this->MF = &MF;
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ TRI = TM->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ LiveVars = NULL;
+ LiveInts = NULL;
+ LiveStks = NULL;
+ Indexes = NULL;
+ if (PASS) {
+ LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+ // We don't want to verify LiveVariables if LiveIntervals is available.
+ if (!LiveInts)
+ LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+ Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
+ }
+
+ visitMachineFunctionBefore();
+ for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+ MFI!=MFE; ++MFI) {
+ visitMachineBasicBlockBefore(MFI);
+ // Keep track of the current bundle header.
+ const MachineInstr *CurBundle = 0;
+ // Do we expect the next instruction to be part of the same bundle?
+ bool InBundle = false;
+
+ for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
+ if (MBBI->getParent() != MFI) {
+ report("Bad instruction parent pointer", MFI);
+ *OS << "Instruction: " << *MBBI;
+ continue;
+ }
+
+ // Check for consistent bundle flags.
+ if (InBundle && !MBBI->isBundledWithPred())
+ report("Missing BundledPred flag, "
+ "BundledSucc was set on predecessor", MBBI);
+ if (!InBundle && MBBI->isBundledWithPred())
+ report("BundledPred flag is set, "
+ "but BundledSucc not set on predecessor", MBBI);
+
+ // Is this a bundle header?
+ if (!MBBI->isInsideBundle()) {
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ CurBundle = MBBI;
+ visitMachineBundleBefore(CurBundle);
+ } else if (!CurBundle)
+ report("No bundle header", MBBI);
+ visitMachineInstrBefore(MBBI);
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
+ visitMachineOperand(&MBBI->getOperand(I), I);
+ visitMachineInstrAfter(MBBI);
+
+ // Was this the last bundled instruction?
+ InBundle = MBBI->isBundledWithSucc();
+ }
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ if (InBundle)
+ report("BundledSucc flag set on last instruction in block", &MFI->back());
+ visitMachineBasicBlockAfter(MFI);
+ }
+ visitMachineFunctionAfter();
+
+ if (OutFile)
+ delete OutFile;
+ else if (foundErrors)
+ report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
+
+ // Clean up.
+ regsLive.clear();
+ regsDefined.clear();
+ regsDead.clear();
+ regsKilled.clear();
+ regMasks.clear();
+ regsLiveInButUnused.clear();
+ MBBInfoMap.clear();
+
+ return false; // no changes
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
+ assert(MF);
+ *OS << '\n';
+ if (!foundErrors++) {
+ if (Banner)
+ *OS << "# " << Banner << '\n';
+ MF->print(*OS, Indexes);
+ }
+ *OS << "*** Bad machine code: " << msg << " ***\n"
+ << "- function: " << MF->getName() << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
+ assert(MBB);
+ report(msg, MBB->getParent());
+ *OS << "- basic block: BB#" << MBB->getNumber()
+ << ' ' << MBB->getName()
+ << " (" << (const void*)MBB << ')';
+ if (Indexes)
+ *OS << " [" << Indexes->getMBBStartIdx(MBB)
+ << ';' << Indexes->getMBBEndIdx(MBB) << ')';
+ *OS << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
+ assert(MI);
+ report(msg, MI->getParent());
+ *OS << "- instruction: ";
+ if (Indexes && Indexes->hasIndex(MI))
+ *OS << Indexes->getInstructionIndex(MI) << '\t';
+ MI->print(*OS, TM);
+}
+
+void MachineVerifier::report(const char *msg,
+ const MachineOperand *MO, unsigned MONum) {
+ assert(MO);
+ report(msg, MO->getParent());
+ *OS << "- operand " << MONum << ": ";
+ MO->print(*OS, TM);
+ *OS << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI) {
+ report(msg, MF);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI) {
+ report(msg, MBB);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ if (!MInfo.reachable) {
+ MInfo.reachable = true;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+ markReachable(*SuI);
+ }
+}
+
+void MachineVerifier::visitMachineFunctionBefore() {
+ lastIndex = SlotIndex();
+ regsReserved = MRI->getReservedRegs();
+
+ // A sub-register of a reserved register is also reserved
+ for (int Reg = regsReserved.find_first(); Reg>=0;
+ Reg = regsReserved.find_next(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ // FIXME: This should probably be:
+ // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register");
+ regsReserved.set(*SubRegs);
+ }
+ }
+
+ markReachable(&MF->front());
+
+ // Build a set of the basic blocks in the function.
+ FunctionBlocks.clear();
+ for (MachineFunction::const_iterator
+ I = MF->begin(), E = MF->end(); I != E; ++I) {
+ FunctionBlocks.insert(I);
+ BBInfo &MInfo = MBBInfoMap[I];
+
+ MInfo.Preds.insert(I->pred_begin(), I->pred_end());
+ if (MInfo.Preds.size() != I->pred_size())
+ report("MBB has duplicate entries in its predecessor list.", I);
+
+ MInfo.Succs.insert(I->succ_begin(), I->succ_end());
+ if (MInfo.Succs.size() != I->succ_size())
+ report("MBB has duplicate entries in its successor list.", I);
+ }
+}
+
+// Does iterator point to a and b as the first two elements?
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+ const MachineBasicBlock *a, const MachineBasicBlock *b) {
+ if (*i == a)
+ return *++i == b;
+ if (*i == b)
+ return *++i == a;
+ return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+ FirstTerminator = 0;
+
+ if (MRI->isSSA()) {
+ // If this block has allocatable physical registers live-in, check that
+ // it is an entry block or landing pad.
+ for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
+ LE = MBB->livein_end();
+ LI != LE; ++LI) {
+ unsigned reg = *LI;
+ if (isAllocatable(reg) && !MBB->isLandingPad() &&
+ MBB != MBB->getParent()->begin()) {
+ report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+ }
+ }
+ }
+
+ // Count the number of landing pad successors.
+ SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ if ((*I)->isLandingPad())
+ LandingPadSuccs.insert(*I);
+ if (!FunctionBlocks.count(*I))
+ report("MBB has successor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Preds.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the predecessor list of the successor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ // Check the predecessor list.
+ for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+ E = MBB->pred_end(); I != E; ++I) {
+ if (!FunctionBlocks.count(*I))
+ report("MBB has predecessor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Succs.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the successor list of the predecessor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
+ const BasicBlock *BB = MBB->getBasicBlock();
+ if (LandingPadSuccs.size() > 1 &&
+ !(AsmInfo &&
+ AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
+ BB && isa<SwitchInst>(BB->getTerminator())))
+ report("MBB has more than one landing pad successor", MBB);
+
+ // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
+ TBB, FBB, Cond)) {
+ // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+ // check whether its answers match up with reality.
+ if (!TBB && !FBB) {
+ // Block falls through to its successor.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actually fall
+ // out the bottom of the function.
+ } else if (MBB->succ_size() == LandingPadSuccs.size()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actuall fall
+ // out of the block.
+ } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ report("MBB exits via unconditional fall-through but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(MBBI)) {
+ report("MBB exits via unconditional fall-through but its successor "
+ "differs from its CFG successor!", MBB);
+ }
+ if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() &&
+ !TII->isPredicated(getBundleStart(&MBB->back()))) {
+ report("MBB exits via unconditional fall-through but ends with a "
+ "barrier instruction!", MBB);
+ }
+ if (!Cond.empty()) {
+ report("MBB exits via unconditional fall-through but has a condition!",
+ MBB);
+ }
+ } else if (TBB && !FBB && Cond.empty()) {
+ // Block unconditionally branches somewhere.
+ if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ report("MBB exits via unconditional branch but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(TBB)) {
+ report("MBB exits via unconditional branch but the CFG "
+ "successor doesn't match the actual successor!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via unconditional branch but doesn't contain "
+ "any instructions!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isBarrier()) {
+ report("MBB exits via unconditional branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
+ report("MBB exits via unconditional branch but the branch isn't a "
+ "terminator instruction!", MBB);
+ }
+ } else if (TBB && !FBB && !Cond.empty()) {
+ // Block conditionally branches somewhere, otherwise falls through.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ report("MBB conditionally falls through out of function!", MBB);
+ } else if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (&*MBBI != TBB)
+ report("MBB exits via conditional branch/fall-through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/fall-through but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/fall-through but doesn't "
+ "contain any instructions!", MBB);
+ } else if (getBundleStart(&MBB->back())->isBarrier()) {
+ report("MBB exits via conditional branch/fall-through but ends with a "
+ "barrier instruction!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
+ report("MBB exits via conditional branch/fall-through but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ } else if (TBB && FBB) {
+ // Block conditionally branches somewhere, otherwise branches
+ // somewhere else.
+ if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (FBB != TBB)
+ report("MBB exits via conditional branch/branch through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/branch through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/branch but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
+ report("MBB exits via conditional branch/branch but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/branch but doesn't "
+ "contain any instructions!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isBarrier()) {
+ report("MBB exits via conditional branch/branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
+ report("MBB exits via conditional branch/branch but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ if (Cond.empty()) {
+ report("MBB exits via conditinal branch/branch but there's no "
+ "condition!", MBB);
+ }
+ } else {
+ report("AnalyzeBranch returned invalid data!", MBB);
+ }
+ }
+
+ regsLive.clear();
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ regsLive.insert(*I);
+ for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
+ }
+ regsLiveInButUnused = regsLive;
+
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ assert(MFI && "Function has no frame info");
+ BitVector PR = MFI->getPristineRegs(MBB);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+ regsLive.insert(I);
+ for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
+ }
+
+ regsKilled.clear();
+ regsDefined.clear();
+
+ if (Indexes)
+ lastIndex = Indexes->getMBBStartIdx(MBB);
+}
+
+// This function gets called for all bundle headers, including normal
+// stand-alone unbundled instructions.
+void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
+ if (Indexes && Indexes->hasIndex(MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ *OS << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
+
+ // Ensure non-terminators don't follow terminators.
+ // Ignore predicated terminators formed by if conversion.
+ // FIXME: If conversion shouldn't need to violate this rule.
+ if (MI->isTerminator() && !TII->isPredicated(MI)) {
+ if (!FirstTerminator)
+ FirstTerminator = MI;
+ } else if (FirstTerminator) {
+ report("Non-terminator instruction after the first terminator", MI);
+ *OS << "First terminator was:\t" << *FirstTerminator;
+ }
+}
+
+// The operands on an INLINEASM instruction must follow a template.
+// Verify that the flag operands make sense.
+void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
+ // The first two operands on INLINEASM are the asm string and global flags.
+ if (MI->getNumOperands() < 2) {
+ report("Too few operands on inline asm", MI);
+ return;
+ }
+ if (!MI->getOperand(0).isSymbol())
+ report("Asm string must be an external symbol", MI);
+ if (!MI->getOperand(1).isImm())
+ report("Asm flags must be an immediate", MI);
+ // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2,
+ // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16.
+ if (!isUInt<5>(MI->getOperand(1).getImm()))
+ report("Unknown asm flags", &MI->getOperand(1), 1);
+
+ assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed");
+
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+ unsigned NumOps;
+ for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ // There may be implicit ops after the fixed operands.
+ if (!MO.isImm())
+ break;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm());
+ }
+
+ if (OpNo > MI->getNumOperands())
+ report("Missing operands in last group", MI);
+
+ // An optional MDNode follows the groups.
+ if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata())
+ ++OpNo;
+
+ // All trailing operands must be implicit registers.
+ for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (!MO.isReg() || !MO.isImplicit())
+ report("Expected implicit register after groups", &MO, OpNo);
+ }
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->getNumOperands() < MCID.getNumOperands()) {
+ report("Too few operands", MI);
+ *OS << MCID.getNumOperands() << " operands expected, but "
+ << MI->getNumExplicitOperands() << " given.\n";
+ }
+
+ // Check the tied operands.
+ if (MI->isInlineAsm())
+ verifyInlineAsm(MI);
+
+ // Check the MachineMemOperands for basic consistency.
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ if ((*I)->isLoad() && !MI->mayLoad())
+ report("Missing mayLoad flag", MI);
+ if ((*I)->isStore() && !MI->mayStore())
+ report("Missing mayStore flag", MI);
+ }
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one, unless they are inside a bundle.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(MI);
+ if (MI->isDebugValue()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else if (MI->isInsideBundle()) {
+ if (mapped)
+ report("Instruction inside bundle has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
+ StringRef ErrorInfo;
+ if (!TII->verifyInstruction(MI, ErrorInfo))
+ report(ErrorInfo.data(), MI);
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ // The first MCID.NumDefs operands must be explicit register defines
+ if (MONum < MCID.getNumDefs()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+ if (!MO->isReg())
+ report("Explicit definition must be a register", MO, MONum);
+ else if (!MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit definition marked as use", MO, MONum);
+ else if (MO->isImplicit())
+ report("Explicit definition marked as implicit", MO, MONum);
+ } else if (MONum < MCID.getNumOperands()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+ // Don't check if it's the last operand in a variadic instruction. See,
+ // e.g., LDM_RET in the arm back end.
+ if (MO->isReg() &&
+ !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
+ if (MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit operand marked as def", MO, MONum);
+ if (MO->isImplicit())
+ report("Explicit operand marked as implicit", MO, MONum);
+ }
+
+ int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
+ if (TiedTo != -1) {
+ if (!MO->isReg())
+ report("Tied use must be a register", MO, MONum);
+ else if (!MO->isTied())
+ report("Operand should be tied", MO, MONum);
+ else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
+ report("Tied def doesn't match MCInstrDesc", MO, MONum);
+ } else if (MO->isReg() && MO->isTied())
+ report("Explicit operand should not be tied", MO, MONum);
+ } else {
+ // ARM adds %reg0 operands to indicate predicates. We'll allow that.
+ if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
+ report("Extra explicit operand on non-variadic instruction", MO, MONum);
+ }
+
+ switch (MO->getType()) {
+ case MachineOperand::MO_Register: {
+ const unsigned Reg = MO->getReg();
+ if (!Reg)
+ return;
+ if (MRI->tracksLiveness() && !MI->isDebugValue())
+ checkLiveness(MO, MONum);
+
+ // Verify the consistency of tied operands.
+ if (MO->isTied()) {
+ unsigned OtherIdx = MI->findTiedOperandIdx(MONum);
+ const MachineOperand &OtherMO = MI->getOperand(OtherIdx);
+ if (!OtherMO.isReg())
+ report("Must be tied to a register", MO, MONum);
+ if (!OtherMO.isTied())
+ report("Missing tie flags on tied operand", MO, MONum);
+ if (MI->findTiedOperandIdx(OtherIdx) != MONum)
+ report("Inconsistent tie links", MO, MONum);
+ if (MONum < MCID.getNumDefs()) {
+ if (OtherIdx < MCID.getNumOperands()) {
+ if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO))
+ report("Explicit def tied to explicit use without tie constraint",
+ MO, MONum);
+ } else {
+ if (!OtherMO.isImplicit())
+ report("Explicit def should be tied to implicit use", MO, MONum);
+ }
+ }
+ }
+
+ // Verify two-address constraints after leaving SSA form.
+ unsigned DefIdx;
+ if (!MRI->isSSA() && MO->isUse() &&
+ MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+ Reg != MI->getOperand(DefIdx).getReg())
+ report("Two-address instruction operands must be identical", MO, MONum);
+
+ // Check register classes.
+ if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
+ unsigned SubIdx = MO->getSubReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (SubIdx) {
+ report("Illegal subregister index for physical register", MO, MONum);
+ return;
+ }
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ if (!DRC->contains(Reg)) {
+ report("Illegal physical register for instruction", MO, MONum);
+ *OS << TRI->getName(Reg) << " is not a "
+ << DRC->getName() << " register.\n";
+ }
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (SubIdx) {
+ const TargetRegisterClass *SRC =
+ TRI->getSubClassWithSubReg(RC, SubIdx);
+ if (!SRC) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ *OS << "Register class " << RC->getName()
+ << " does not support subreg index " << SubIdx << "\n";
+ return;
+ }
+ if (RC != SRC) {
+ report("Invalid register class for subregister index", MO, MONum);
+ *OS << "Register class " << RC->getName()
+ << " does not fully support subreg index " << SubIdx << "\n";
+ return;
+ }
+ }
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ if (SubIdx) {
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(RC);
+ if (!SuperRC) {
+ report("No largest legal super class exists.", MO, MONum);
+ return;
+ }
+ DRC = TRI->getMatchingSuperRegClass(SuperRC, DRC, SubIdx);
+ if (!DRC) {
+ report("No matching super-reg register class.", MO, MONum);
+ return;
+ }
+ }
+ if (!RC->hasSuperClassEq(DRC)) {
+ report("Illegal virtual register for instruction", MO, MONum);
+ *OS << "Expected a " << DRC->getName() << " register, but got a "
+ << RC->getName() << " register\n";
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case MachineOperand::MO_RegisterMask:
+ regMasks.push_back(MO->getRegMask());
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+ report("PHI operand is not in the CFG", MO, MONum);
+ break;
+
+ case MachineOperand::MO_FrameIndex:
+ if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+ SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+ if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
+ report("Instruction loads from dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
+ report("Instruction stores to dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const unsigned Reg = MO->getReg();
+
+ // Both use and def operands can read a register.
+ if (MO->readsReg()) {
+ regsLiveInButUnused.erase(Reg);
+
+ if (MO->isKill())
+ addRegWithSubRegs(regsKilled, Reg);
+
+ // Check that LiveVars knows this kill.
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MO->isKill()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI);
+ // Check the cached regunit intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) {
+ LiveRangeQuery LRQ(*LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+ << ' ' << *LI << '\n';
+ }
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n';
+ }
+ }
+ }
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (LiveInts->hasInterval(Reg)) {
+ // This is a virtual register interval.
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ LiveRangeQuery LRQ(LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
+ }
+ }
+ }
+
+ // Use of a dead register.
+ if (!regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ if (!isReserved(Reg))
+ report("Using an undefined physical register", MO, MONum);
+ } else if (MRI->def_empty(Reg)) {
+ report("Reading virtual register without a def", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (!MI->isPHI())
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ }
+
+ if (MO->isDef()) {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ // Verify SSA form.
+ if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
+ report("Multiple virtual register defs in SSA form", MO, MONum);
+
+ // Check LiveInts for a live range, but only for virtual registers.
+ if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI);
+ DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx) {
+ report("Inconsistent valno->def", MO, MONum);
+ *OS << "Valno " << VNI->id << " is not defined at "
+ << DefIdx << " in " << LI << '\n';
+ }
+ } else {
+ report("No live range at def", MO, MONum);
+ *OS << DefIdx << " is not live in " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+}
+
+// This function gets called after visiting all instructions in a bundle. The
+// argument points to the bundle header.
+// Normal stand-alone instructions are also considered 'bundles', and this
+// function is called for all of them.
+void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ set_union(MInfo.regsKilled, regsKilled);
+ set_subtract(regsLive, regsKilled); regsKilled.clear();
+ // Kill any masked registers.
+ while (!regMasks.empty()) {
+ const uint32_t *Mask = regMasks.pop_back_val();
+ for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
+ if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+ MachineOperand::clobbersPhysReg(Mask, *I))
+ regsDead.push_back(*I);
+ }
+ set_subtract(regsLive, regsDead); regsDead.clear();
+ set_union(regsLive, regsDefined); regsDefined.clear();
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
+ MBBInfoMap[MBB].regsLiveOut = regsLive;
+ regsLive.clear();
+
+ if (Indexes) {
+ SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+ if (!(stop > lastIndex)) {
+ report("Block ends before last instruction index", MBB);
+ *OS << "Block ends at " << stop
+ << " last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = stop;
+ }
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void MachineVerifier::calcRegsPassed() {
+ // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+ // have any vregsPassed.
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ if (!MInfo.reachable)
+ continue;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+ SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.regsLiveOut))
+ todo.insert(*SuI);
+ }
+ }
+
+ // Iteratively push vregsPassed to successors. This will converge to the same
+ // final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+ if (*SuI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.vregsPassed))
+ todo.insert(*SuI);
+ }
+ }
+}
+
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+ // First push live-in regs to predecessors' vregsRequired.
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
+ PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PInfo = MBBInfoMap[*PrI];
+ if (PInfo.addRequired(MInfo.vregsLiveIn))
+ todo.insert(*PrI);
+ }
+ }
+
+ // Iteratively push vregsRequired to predecessors. This will converge to the
+ // same final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (*PrI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*PrI];
+ if (SInfo.addRequired(MInfo.vregsRequired))
+ todo.insert(*PrI);
+ }
+ }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+ SmallPtrSet<const MachineBasicBlock*, 8> seen;
+ for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ seen.clear();
+
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+ if (!Pre->isSuccessor(MBB))
+ continue;
+ seen.insert(Pre);
+ BBInfo &PrInfo = MBBInfoMap[Pre];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+ report("PHI operand is not live-out from predecessor",
+ &BBI->getOperand(i), i);
+ }
+
+ // Did we see all predecessors?
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (!seen.count(*PrI)) {
+ report("Missing PHI operand", BBI);
+ *OS << "BB#" << (*PrI)->getNumber()
+ << " is a predecessor according to the CFG.\n";
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineFunctionAfter() {
+ calcRegsPassed();
+
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ checkPHIOps(MFI);
+ }
+
+ // Now check liveness info if available
+ calcRegsRequired();
+
+ // Check for killed virtual registers that should be live out.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ if (MInfo.regsKilled.count(*I)) {
+ report("Virtual register killed in block, but needed live out.", MFI);
+ *OS << "Virtual register " << PrintReg(*I)
+ << " is used after the block.\n";
+ }
+ }
+
+ if (!MF->empty()) {
+ BBInfo &MInfo = MBBInfoMap[&MF->front()];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ report("Virtual register def doesn't dominate all uses.",
+ MRI->getVRegDef(*I));
+ }
+
+ if (LiveVars)
+ verifyLiveVariables();
+ if (LiveInts)
+ verifyLiveIntervals();
+}
+
+void MachineVerifier::verifyLiveVariables() {
+ assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+ if (MInfo.vregsRequired.count(Reg)) {
+ if (!VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block missing from AliveBlocks", MFI);
+ *OS << "Virtual register " << PrintReg(Reg)
+ << " must be live through the block.\n";
+ }
+ } else {
+ if (VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block should not be in AliveBlocks", MFI);
+ *OS << "Virtual register " << PrintReg(Reg)
+ << " is not needed live through the block.\n";
+ }
+ }
+ }
+ }
+}
+
+void MachineVerifier::verifyLiveIntervals() {
+ assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+ // Spilling and splitting may leave unused registers around. Skip them.
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+
+ if (!LiveInts->hasInterval(Reg)) {
+ report("Missing live interval for virtual register", MF);
+ *OS << PrintReg(Reg, TRI) << " still has defs or uses\n";
+ continue;
+ }
+
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ assert(Reg == LI.reg && "Invalid reg to interval mapping");
+ verifyLiveInterval(LI);
+ }
+
+ // Verify all the cached regunit intervals.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i))
+ verifyLiveInterval(*LI);
+}
+
+void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
+ VNInfo *VNI) {
+ if (VNI->isUnused())
+ return;
+
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+
+ if (!DefVNI) {
+ report("Valno not live at def and not marked unused", MF, LI);
+ *OS << "Valno #" << VNI->id << '\n';
+ return;
+ }
+
+ if (DefVNI != VNI) {
+ report("Live range at def has different valno", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live\n";
+ return;
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ return;
+ }
+
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+ }
+ return;
+ }
+
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ return;
+ }
+
+ bool hasDef = false;
+ bool isEarlyClobber = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ if (MOI->getReg() != LI.reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->hasRegUnit(MOI->getReg(), LI.reg))
+ continue;
+ }
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
+
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot",
+ MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+}
+
+void
+MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
+ LiveInterval::const_iterator I) {
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
+
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live range", MF, LI);
+ *OS << *I << " has a bad valno\n";
+ }
+
+ if (VNI->isUnused()) {
+ report("Live range valno is marked unused", MF, LI);
+ *OS << *I << '\n';
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB, LI);
+ *OS << *I << '\n';
+ }
+
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
+
+ // No more checks for live-out segments.
+ if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+ return;
+
+ // RegUnit intervals are allowed dead phis.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() &&
+ I->start == VNI->def && I->end == VNI->def.getDeadSlot())
+ return;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ return;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (I->end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+
+ if (I->end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(I->start, I->end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // A live segment can only end at an early-clobber slot if it is being
+ // redefined by an early-clobber def.
+ if (I->end.isEarlyClobber()) {
+ if (I+1 == LI.end() || (I+1)->start != I->end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != LI.reg)
+ continue;
+ if (MOI->readsReg())
+ hasRead = true;
+ if (MOI->isDef() && MOI->isDead())
+ hasDeadDef = true;
+ }
+
+ if (I->end.isDead()) {
+ if (!hasDeadDef) {
+ report("Instruction doesn't have a dead def operand", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ } else {
+ if (!hasRead) {
+ report("Instruction ending live range doesn't read the register", MI);
+ *OS << *I << " in " << LI << '\n';
+ }
+ }
+ }
+
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ return;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
+
+ // Is VNI a PHI-def in the current block?
+ bool IsPHI = VNI->isPHIDef() &&
+ VNI->def == LiveInts->getMBBStartIdx(MFI);
+
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
+
+ // All predecessors must have a live-out value.
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI, LI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
+ << PEnd << '\n';
+ continue;
+ }
+
+ // Only PHI-defs can take different predecessor values.
+ if (!IsPHI && PVNI != VNI) {
+ report("Different value live out of predecessor", *PI, LI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
+ }
+ }
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+}
+
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I)
+ verifyLiveIntervalValue(LI, *I);
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I)
+ verifyLiveIntervalSegment(LI, I);
+
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF, LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
new file mode 100644
index 0000000..48db200
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
@@ -0,0 +1,37 @@
+//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics compatible with
+// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
+//
+// The frametable emitter is in OcamlGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+namespace {
+ class OcamlGC : public GCStrategy {
+ public:
+ OcamlGC();
+ };
+}
+
+static GCRegistry::Add<OcamlGC>
+X("ocaml", "ocaml 3.10-compatible GC");
+
+void llvm::linkOcamlGC() { }
+
+OcamlGC::OcamlGC() {
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+}
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 0000000..3982612
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,193 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phi-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+ class OptimizePHIs : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification
+ OptimizePHIs() : MachineFunctionPass(ID) {
+ initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
+ typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+
+ bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+ InstrSet &PHIsInCycle);
+ bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+ bool OptimizeBB(MachineBasicBlock &MBB);
+ };
+}
+
+char OptimizePHIs::ID = 0;
+char &llvm::OptimizePHIsID = OptimizePHIs::ID;
+INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+ "Optimize machine instruction PHIs", false, false)
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+ MRI = &Fn.getRegInfo();
+ TII = Fn.getTarget().getInstrInfo();
+
+ // Find dead PHI cycles and PHI cycles that can be replaced by a single
+ // value. InstCombine does these optimizations, but DAG legalization may
+ // introduce new opportunities, e.g., when i64 values are split up for
+ // 32-bit targets.
+ bool Changed = false;
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= OptimizeBB(*I);
+
+ return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+ unsigned &SingleValReg,
+ InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ // Scan the PHI operands.
+ for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ if (SrcReg == DstReg)
+ continue;
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+ // Skip over register-to-register moves.
+ if (SrcMI && SrcMI->isCopy() &&
+ !SrcMI->getOperand(0).getSubReg() &&
+ !SrcMI->getOperand(1).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+ if (!SrcMI)
+ return false;
+
+ if (SrcMI->isPHI()) {
+ if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+ return false;
+ } else {
+ // Fail if there is more than one non-phi/non-move register.
+ if (SingleValReg != 0)
+ return false;
+ SingleValReg = SrcReg;
+ }
+ }
+ return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ "PHI destination is not a virtual register");
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg),
+ E = MRI->use_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle))
+ return false;
+ }
+
+ return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator
+ MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+ MachineInstr *MI = &*MII++;
+ if (!MI->isPHI())
+ break;
+
+ // Check for single-value PHI cycles.
+ unsigned SingleValReg = 0;
+ InstrSet PHIsInCycle;
+ if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+ SingleValReg != 0) {
+ unsigned OldReg = MI->getOperand(0).getReg();
+ if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
+ continue;
+
+ MRI->replaceRegWith(OldReg, SingleValReg);
+ MI->eraseFromParent();
+ ++NumPHICycles;
+ Changed = true;
+ continue;
+ }
+
+ // Check for dead PHI cycles.
+ PHIsInCycle.clear();
+ if (IsDeadPHICycle(MI, PHIsInCycle)) {
+ for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
+ PI != PE; ++PI) {
+ MachineInstr *PhiMI = *PI;
+ if (&*MII == PhiMI)
+ ++MII;
+ PhiMI->eraseFromParent();
+ }
+ ++NumDeadPHICycles;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 0000000..5584708
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,644 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions. This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "llvm/CodeGen/Passes.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
+ cl::Hidden, cl::desc("Disable critical edge splitting "
+ "during PHI elimination"));
+
+static cl::opt<bool>
+SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
+ cl::Hidden, cl::desc("Split all critical edges during "
+ "PHI elimination"));
+
+namespace {
+ class PHIElimination : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+ LiveVariables *LV;
+ LiveIntervals *LIS;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PHIElimination() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineLoopInfo *MLI);
+
+ // These functions are temporary abstractions around LiveVariables and
+ // LiveIntervals, so they can go away when LiveVariables does.
+ bool isLiveIn(unsigned Reg, MachineBasicBlock *MBB);
+ bool isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB);
+
+ typedef std::pair<unsigned, unsigned> BBVRegPair;
+ typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+ // Map reusable lowered PHI node -> incoming join register.
+ typedef DenseMap<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait> LoweredPHIMap;
+ LoweredPHIMap LoweredPHIs;
+ };
+}
+
+STATISTIC(NumLowered, "Number of phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
+STATISTIC(NumReused, "Number of reused lowered phis");
+
+char PHIElimination::ID = 0;
+char& llvm::PHIEliminationID = PHIElimination::ID;
+
+INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation", false, false)
+
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+
+ bool Changed = false;
+
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
+ // Split critical edges to help the coalescer. This does not yet support
+ // updating LiveIntervals, so we disable it.
+ if (!DisableEdgeSplitting && (LV || LIS)) {
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= SplitPHIEdges(MF, *I, MLI);
+ }
+
+ // Populate VRegPHIUseCount
+ analyzePHINodes(MF);
+
+ // Eliminate PHI instructions by inserting copies into predecessor blocks.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= EliminatePHINodes(MF, *I);
+
+ // Remove dead IMPLICIT_DEF instructions.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(),
+ E = ImpDefs.end(); I != E; ++I) {
+ MachineInstr *DefMI = *I;
+ unsigned DefReg = DefMI->getOperand(0).getReg();
+ if (MRI->use_nodbg_empty(DefReg)) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(DefMI);
+ DefMI->eraseFromParent();
+ }
+ }
+
+ // Clean up the lowered PHI instructions.
+ for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
+ I != E; ++I) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(I->first);
+ MF.DeleteMachineInstr(I->first);
+ }
+
+ LoweredPHIs.clear();
+ ImpDefs.clear();
+ VRegPHIUseCount.clear();
+
+ return Changed;
+}
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (MBB.empty() || !MBB.front().isPHI())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ // Get an iterator to the first instruction after the last PHI node (this may
+ // also be the end of the basic block).
+ MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
+
+ while (MBB.front().isPHI())
+ LowerPHINode(MBB, AfterPHIsIt);
+
+ return true;
+}
+
+/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs.
+/// This includes registers with no defs.
+static bool isImplicitlyDefined(unsigned VirtReg,
+ const MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg),
+ DE = MRI->def_end(); DI != DE; ++DI)
+ if (!DI->isImplicitDef())
+ return false;
+ return true;
+}
+
+/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
+/// are implicit_def's.
+static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
+ const MachineRegisterInfo *MRI) {
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI))
+ return false;
+ return true;
+}
+
+
+/// LowerPHINode - Lower the PHI node at the top of the specified block,
+///
+void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt) {
+ ++NumLowered;
+ // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+ MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+ unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+ unsigned DestReg = MPhi->getOperand(0).getReg();
+ assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
+ bool isDead = MPhi->getOperand(0).isDead();
+
+ // Create a new register for the incoming PHI arguments.
+ MachineFunction &MF = *MBB.getParent();
+ unsigned IncomingReg = 0;
+ bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
+
+ // Insert a register to register copy at the top of the current block (but
+ // after any remaining phi nodes) which copies the new incoming register
+ // into the phi node destination.
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ if (isSourceDefinedByImplicitDef(MPhi, MRI))
+ // If all sources of a PHI node are implicit_def, just emit an
+ // implicit_def instead of a copy.
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ else {
+ // Can we reuse an earlier PHI node? This only happens for critical edges,
+ // typically those created by tail duplication.
+ unsigned &entry = LoweredPHIs[MPhi];
+ if (entry) {
+ // An identical PHI node was already lowered. Reuse the incoming register.
+ IncomingReg = entry;
+ reusedIncoming = true;
+ ++NumReused;
+ DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
+ } else {
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+ entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ }
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), DestReg)
+ .addReg(IncomingReg);
+ }
+
+ // Update live variable information if there is any.
+ if (LV) {
+ MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+ if (IncomingReg) {
+ LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
+
+ // Increment use count of the newly created virtual register.
+ LV->setPHIJoin(IncomingReg);
+
+ // When we are reusing the incoming register, it may already have been
+ // killed in this block. The old kill will also have been inserted at
+ // AfterPHIsIt, so it appears before the current PHICopy.
+ if (reusedIncoming)
+ if (MachineInstr *OldKill = VI.findKill(&MBB)) {
+ DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+ LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
+ DEBUG(MBB.dump());
+ }
+
+ // Add information to LiveVariables to know that the incoming value is
+ // killed. Note that because the value is defined in several places (once
+ // each for each incoming block), the "def" block and instruction fields
+ // for the VarInfo is not filled in.
+ LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+ }
+
+ // Since we are going to be deleting the PHI node, if it is the last use of
+ // any registers, or if the value itself is dead, we need to move this
+ // information over to the new copy we just inserted.
+ LV->removeVirtualRegistersKilled(MPhi);
+
+ // If the result is dead, update LV.
+ if (isDead) {
+ LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->removeVirtualRegisterDead(DestReg, MPhi);
+ }
+ }
+
+ // Update LiveIntervals for the new copy or implicit def.
+ if (LIS) {
+ MachineInstr *NewInstr = prior(AfterPHIsIt);
+ SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr);
+
+ SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
+ if (IncomingReg) {
+ // Add the region from the beginning of MBB to the copy instruction to
+ // IncomingReg's live interval.
+ LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg);
+ VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
+ if (!IncomingVNI)
+ IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
+ LIS->getVNInfoAllocator());
+ IncomingLI.addRange(LiveRange(MBBStartIndex,
+ DestCopyIndex.getRegSlot(),
+ IncomingVNI));
+ }
+
+ LiveInterval &DestLI = LIS->getInterval(DestReg);
+ assert(DestLI.begin() != DestLI.end() &&
+ "PHIs should have nonempty LiveIntervals.");
+ if (DestLI.endIndex().isDead()) {
+ // A dead PHI's live range begins and ends at the start of the MBB, but
+ // the lowered copy, which will still be dead, needs to begin and end at
+ // the copy instruction.
+ VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex);
+ assert(OrigDestVNI && "PHI destination should be live at block entry.");
+ DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot());
+ DestLI.createDeadDef(DestCopyIndex.getRegSlot(),
+ LIS->getVNInfoAllocator());
+ DestLI.removeValNo(OrigDestVNI);
+ } else {
+ // Otherwise, remove the region from the beginning of MBB to the copy
+ // instruction from DestReg's live interval.
+ DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot());
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
+ assert(DestVNI && "PHI destination should be live at its definition.");
+ DestVNI->def = DestCopyIndex.getRegSlot();
+ }
+ }
+
+ // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+
+ // Now loop over all of the incoming arguments, changing them to copy into the
+ // IncomingReg register in the corresponding predecessor basic block.
+ SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+ for (int i = NumSrcs - 1; i >= 0; --i) {
+ unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+ bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
+ isImplicitlyDefined(SrcReg, MRI);
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+ // path the PHI.
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+ // Check to make sure we haven't already emitted the copy for this block.
+ // This can happen because PHI nodes may have multiple entries for the same
+ // basic block.
+ if (!MBBsInsertedInto.insert(&opBlock))
+ continue; // If the copy has already been emitted, we're done.
+
+ // Find a safe location to insert the copy, this may be the first terminator
+ // in the block (or end()).
+ MachineBasicBlock::iterator InsertPos =
+ findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
+
+ // Insert the copy.
+ MachineInstr *NewSrcInstr = 0;
+ if (!reusedIncoming && IncomingReg) {
+ if (SrcUndef) {
+ // The source register is undefined, so there is no need for a real
+ // COPY, but we still need to ensure joint dominance by defs.
+ // Insert an IMPLICIT_DEF instruction.
+ NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF),
+ IncomingReg);
+
+ // Clean up the old implicit-def, if there even was one.
+ if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
+ if (DefMI->isImplicitDef())
+ ImpDefs.insert(DefMI);
+ } else {
+ NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IncomingReg)
+ .addReg(SrcReg, 0, SrcSubReg);
+ }
+ }
+
+ // We only need to update the LiveVariables kill of SrcReg if this was the
+ // last PHI use of SrcReg to be lowered on this CFG edge and it is not live
+ // out of the predecessor. We can also ignore undef sources.
+ if (LV && !SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] &&
+ !LV->isLiveOut(SrcReg, opBlock)) {
+ // We want to be able to insert a kill of the register if this PHI (aka,
+ // the copy we just inserted) is the last use of the source value. Live
+ // variable analysis conservatively handles this by saying that the value
+ // is live until the end of the block the PHI entry lives in. If the value
+ // really is dead at the PHI copy, there will be no successor blocks which
+ // have the value live-in.
+
+ // Okay, if we now know that the value is not live out of the block, we
+ // can add a kill marker in this block saying that it kills the incoming
+ // value!
+
+ // In our final twist, we have to decide which instruction kills the
+ // register. In most cases this is the copy, however, terminator
+ // instructions at the end of the block may also use the value. In this
+ // case, we should mark the last such terminator as being the killing
+ // block, not the copy.
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't insert a copy this time.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
+ }
+ }
+ assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
+
+ // Finally, mark it killed.
+ LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+ // This vreg no longer lives all of the way through opBlock.
+ unsigned opBlockNum = opBlock.getNumber();
+ LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
+ }
+
+ if (LIS) {
+ if (NewSrcInstr) {
+ LIS->InsertMachineInstrInMaps(NewSrcInstr);
+ LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr);
+ }
+
+ if (!SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) {
+ LiveInterval &SrcLI = LIS->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
+ SE = opBlock.succ_end(); SI != SE; ++SI) {
+ SlotIndex startIdx = LIS->getMBBStartIdx(*SI);
+ VNInfo *VNI = SrcLI.getVNInfoAt(startIdx);
+
+ // Definitions by other PHIs are not truly live-in for our purposes.
+ if (VNI && VNI->def != startIdx) {
+ isLiveOut = true;
+ break;
+ }
+ }
+
+ if (!isLiveOut) {
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't just insert a copy.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
+ }
+ }
+ assert(KillInst->readsRegister(SrcReg) &&
+ "Cannot find kill instruction");
+
+ SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst);
+ SrcLI.removeRange(LastUseIndex.getRegSlot(),
+ LIS->getMBBEndIdx(&opBlock));
+ }
+ }
+ }
+ }
+
+ // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
+ if (reusedIncoming || !IncomingReg) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MPhi);
+ MF.DeleteMachineInstr(MPhi);
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(),
+ BBI->getOperand(i).getReg())];
+}
+
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineLoopInfo *MLI) {
+ if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0;
+ bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+ // Is there a critical edge from PreMBB to MBB?
+ if (PreMBB->succ_size() == 1)
+ continue;
+
+ // Avoid splitting backedges of loops. It would introduce small
+ // out-of-line blocks into the loop which is very bad for code placement.
+ if (PreMBB == &MBB && !SplitAllCriticalEdges)
+ continue;
+ const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0;
+ if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges)
+ continue;
+
+ // LV doesn't consider a phi use live-out, so isLiveOut only returns true
+ // when the source register is live-out for some other reason than a phi
+ // use. That means the copy we will insert in PreMBB won't be a kill, and
+ // there is a risk it may not be coalesced away.
+ //
+ // If the copy would be a kill, there is no need to split the edge.
+ if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges)
+ continue;
+
+ DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+ << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+ << ": " << *BBI);
+
+ // If Reg is not live-in to MBB, it means it must be live-in to some
+ // other PreMBB successor, and we can avoid the interference by splitting
+ // the edge.
+ //
+ // If Reg *is* live-in to MBB, the interference is inevitable and a copy
+ // is likely to be left after coalescing. If we are looking at a loop
+ // exiting edge, split it so we won't insert code in the loop, otherwise
+ // don't bother.
+ bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges;
+
+ // Check for a loop exiting edge.
+ if (!ShouldSplit && CurLoop != PreLoop) {
+ DEBUG({
+ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n";
+ if (PreLoop) dbgs() << "PreLoop: " << *PreLoop;
+ if (CurLoop) dbgs() << "CurLoop: " << *CurLoop;
+ });
+ // This edge could be entering a loop, exiting a loop, or it could be
+ // both: Jumping directly form one loop to the header of a sibling
+ // loop.
+ // Split unless this edge is entering CurLoop from an outer loop.
+ ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
+ }
+ if (!ShouldSplit)
+ continue;
+ if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
+ DEBUG(dbgs() << "Failed to split ciritcal edge.\n");
+ continue;
+ }
+ Changed = true;
+ ++NumCriticalEdgesSplit;
+ }
+ }
+ return Changed;
+}
+
+bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveIn() requires either LiveVariables or LiveIntervals");
+ if (LIS)
+ return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB);
+ else
+ return LV->isLiveIn(Reg, *MBB);
+}
+
+bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
+ // LiveVariables considers uses in PHIs to be in the predecessor basic block,
+ // so that a register used only in a PHI is not live out of the block. In
+ // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than
+ // in the predecessor basic block, so that a register used only in a PHI is live
+ // out of the block.
+ if (LIS) {
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ if (LI.liveAt(LIS->getMBBStartIdx(*SI)))
+ return true;
+ }
+ return false;
+ } else {
+ return LV->isLiveOut(Reg, *MBB);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 0000000..e1b56e9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB->empty())
+ return MBB->begin();
+
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB->isLandingPad())
+ return MBB->getFirstTerminator();
+
+ // Discover any defs/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+ RE = MRI.reg_end(); RI != RE; ++RI) {
+ MachineInstr* DefUseMI = &*RI;
+ if (DefUseMI->getParent() == MBB)
+ DefUsesInMBB.insert(DefUseMI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No defs. Insert the copy at the start of the basic block.
+ InsertPoint = MBB->begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the def/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last def/use.
+ InsertPoint = MBB->end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 0000000..9ac47fb4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+ /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
+ MachineBasicBlock::iterator
+ findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg);
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
new file mode 100644
index 0000000..1af65c8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -0,0 +1,746 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+ cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+ cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+ cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+ cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> DisableBlockPlacement("disable-block-placement",
+ cl::Hidden, cl::desc("Disable probability-driven block placement"));
+static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
+ cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+ cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+ cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden,
+ cl::desc("Disable Early If-conversion"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+ cl::desc("Disable Machine Common Subexpression Elimination"));
+static cl::opt<cl::boolOrDefault>
+OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
+ cl::desc("Enable optimized register allocation compilation path."));
+static cl::opt<cl::boolOrDefault>
+EnableMachineSched("enable-misched", cl::Hidden,
+ cl::desc("Enable the machine instruction scheduling pass."));
+static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden,
+ cl::desc("Use strong PHI elimination."));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+ cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+ cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+ cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+ cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
+ cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+static cl::opt<std::string>
+PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
+ cl::desc("Print machine instrs"),
+ cl::value_desc("pass-name"), cl::init("option-unspecified"));
+
+// Experimental option to run live interval analysis early.
+static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
+ cl::desc("Run live interval analysis earlier in the pipeline"));
+
+/// Allow standard passes to be disabled by command line options. This supports
+/// simple binary flags that either suppress the pass or do nothing.
+/// i.e. -disable-mypass=false has no effect.
+/// These should be converted to boolOrDefault in order to use applyOverride.
+static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
+ if (Override)
+ return 0;
+ return PassID;
+}
+
+/// Allow Pass selection to be overriden by command line options. This supports
+/// flags with ternary conditions. TargetID is passed through by default. The
+/// pass is suppressed when the option is false. When the option is true, the
+/// StandardID is selected if the target provides no default.
+static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
+ AnalysisID StandardID) {
+ switch (Override) {
+ case cl::BOU_UNSET:
+ return TargetID;
+ case cl::BOU_TRUE:
+ if (TargetID)
+ return TargetID;
+ if (StandardID == 0)
+ report_fatal_error("Target cannot enable pass");
+ return StandardID;
+ case cl::BOU_FALSE:
+ return 0;
+ }
+ llvm_unreachable("Invalid command line option state");
+}
+
+/// Allow standard passes to be disabled by the command line, regardless of who
+/// is adding the pass.
+///
+/// StandardID is the pass identified in the standard pass pipeline and provided
+/// to addPass(). It may be a target-specific ID in the case that the target
+/// directly adds its own pass, but in that case we harmlessly fall through.
+///
+/// TargetID is the pass that the target has configured to override StandardID.
+///
+/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
+/// pass to run. This allows multiple options to control a single pass depending
+/// on where in the pipeline that pass is added.
+static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+ if (StandardID == &PostRASchedulerID)
+ return applyDisable(TargetID, DisablePostRA);
+
+ if (StandardID == &BranchFolderPassID)
+ return applyDisable(TargetID, DisableBranchFold);
+
+ if (StandardID == &TailDuplicateID)
+ return applyDisable(TargetID, DisableTailDuplicate);
+
+ if (StandardID == &TargetPassConfig::EarlyTailDuplicateID)
+ return applyDisable(TargetID, DisableEarlyTailDup);
+
+ if (StandardID == &MachineBlockPlacementID)
+ return applyDisable(TargetID, DisableBlockPlacement);
+
+ if (StandardID == &StackSlotColoringID)
+ return applyDisable(TargetID, DisableSSC);
+
+ if (StandardID == &DeadMachineInstructionElimID)
+ return applyDisable(TargetID, DisableMachineDCE);
+
+ if (StandardID == &EarlyIfConverterID)
+ return applyDisable(TargetID, DisableEarlyIfConversion);
+
+ if (StandardID == &MachineLICMID)
+ return applyDisable(TargetID, DisableMachineLICM);
+
+ if (StandardID == &MachineCSEID)
+ return applyDisable(TargetID, DisableMachineCSE);
+
+ if (StandardID == &MachineSchedulerID)
+ return applyOverride(TargetID, EnableMachineSched, StandardID);
+
+ if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
+ return applyDisable(TargetID, DisablePostRAMachineLICM);
+
+ if (StandardID == &MachineSinkingID)
+ return applyDisable(TargetID, DisableMachineSink);
+
+ if (StandardID == &MachineCopyPropagationID)
+ return applyDisable(TargetID, DisableCopyProp);
+
+ return TargetID;
+}
+
+//===---------------------------------------------------------------------===//
+/// TargetPassConfig
+//===---------------------------------------------------------------------===//
+
+INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
+ "Target Pass Configuration", false, false)
+char TargetPassConfig::ID = 0;
+
+// Pseudo Pass IDs.
+char TargetPassConfig::EarlyTailDuplicateID = 0;
+char TargetPassConfig::PostRAMachineLICMID = 0;
+
+namespace llvm {
+class PassConfigImpl {
+public:
+ // List of passes explicitly substituted by this target. Normally this is
+ // empty, but it is a convenient way to suppress or replace specific passes
+ // that are part of a standard pass pipeline without overridding the entire
+ // pipeline. This mechanism allows target options to inherit a standard pass's
+ // user interface. For example, a target may disable a standard pass by
+ // default by substituting a pass ID of zero, and the user may still enable
+ // that standard pass with an explicit command line option.
+ DenseMap<AnalysisID,AnalysisID> TargetPasses;
+
+ /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
+ /// is inserted after each instance of the first one.
+ SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses;
+};
+} // namespace llvm
+
+// Out of line virtual method.
+TargetPassConfig::~TargetPassConfig() {
+ delete Impl;
+}
+
+// Out of line constructor provides default values for pass options and
+// registers all common codegen passes.
+TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
+ : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0),
+ Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false),
+ DisableVerify(false),
+ EnableTailMerge(true) {
+
+ Impl = new PassConfigImpl();
+
+ // Register all target independent codegen passes to activate their PassIDs,
+ // including this pass itself.
+ initializeCodeGen(*PassRegistry::getPassRegistry());
+
+ // Substitute Pseudo Pass IDs for real ones.
+ substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
+ substitutePass(&PostRAMachineLICMID, &MachineLICMID);
+
+ // Temporarily disable experimental passes.
+ const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+ if (!ST.enableMachineScheduler())
+ disablePass(&MachineSchedulerID);
+}
+
+/// Insert InsertedPassID pass after TargetPassID.
+void TargetPassConfig::insertPass(AnalysisID TargetPassID,
+ AnalysisID InsertedPassID) {
+ assert(TargetPassID != InsertedPassID && "Insert a pass after itself!");
+ std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID);
+ Impl->InsertedPasses.push_back(P);
+}
+
+/// createPassConfig - Create a pass configuration object to be used by
+/// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+///
+/// Targets may override this to extend TargetPassConfig.
+TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new TargetPassConfig(this, PM);
+}
+
+TargetPassConfig::TargetPassConfig()
+ : ImmutablePass(ID), PM(0) {
+ llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
+}
+
+// Helper to verify the analysis is really immutable.
+void TargetPassConfig::setOpt(bool &Opt, bool Val) {
+ assert(!Initialized && "PassConfig is immutable");
+ Opt = Val;
+}
+
+void TargetPassConfig::substitutePass(AnalysisID StandardID,
+ AnalysisID TargetID) {
+ Impl->TargetPasses[StandardID] = TargetID;
+}
+
+AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+ DenseMap<AnalysisID, AnalysisID>::const_iterator
+ I = Impl->TargetPasses.find(ID);
+ if (I == Impl->TargetPasses.end())
+ return ID;
+ return I->second;
+}
+
+/// Add a pass to the PassManager if that pass is supposed to be run. If the
+/// Started/Stopped flags indicate either that the compilation should start at
+/// a later pass or that it should stop after an earlier pass, then do not add
+/// the pass. Finally, compare the current pass against the StartAfter
+/// and StopAfter options and change the Started/Stopped flags accordingly.
+void TargetPassConfig::addPass(Pass *P) {
+ assert(!Initialized && "PassConfig is immutable");
+
+ // Cache the Pass ID here in case the pass manager finds this pass is
+ // redundant with ones already scheduled / available, and deletes it.
+ // Fundamentally, once we add the pass to the manager, we no longer own it
+ // and shouldn't reference it.
+ AnalysisID PassID = P->getPassID();
+
+ if (Started && !Stopped)
+ PM->add(P);
+ if (StopAfter == PassID)
+ Stopped = true;
+ if (StartAfter == PassID)
+ Started = true;
+ if (Stopped && !Started)
+ report_fatal_error("Cannot stop compilation after pass that is not run");
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
+ AnalysisID TargetID = getPassSubstitution(PassID);
+ AnalysisID FinalID = overridePass(PassID, TargetID);
+ if (FinalID == 0)
+ return FinalID;
+
+ Pass *P = Pass::createPass(FinalID);
+ if (!P)
+ llvm_unreachable("Pass ID not registered");
+ addPass(P);
+ // Add the passes after the pass P if there is any.
+ for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator
+ I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
+ I != E; ++I) {
+ if ((*I).first == PassID) {
+ assert((*I).second && "Illegal Pass ID!");
+ Pass *NP = Pass::createPass((*I).second);
+ assert(NP && "Pass ID not registered");
+ addPass(NP);
+ }
+ }
+ return FinalID;
+}
+
+void TargetPassConfig::printAndVerify(const char *Banner) {
+ if (TM->shouldPrintMachineCode())
+ addPass(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ if (VerifyMachineCode)
+ addPass(createMachineVerifierPass(Banner));
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+void TargetPassConfig::addIRPasses() {
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ addPass(createTypeBasedAliasAnalysisPass());
+ addPass(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ addPass(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
+ addPass(createLoopStrengthReducePass());
+ if (PrintLSR)
+ addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ addPass(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ addPass(createUnreachableBlockEliminationPass());
+}
+
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+void TargetPassConfig::addPassesToHandleExceptions() {
+ switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ addPass(createSjLjEHPreparePass(TM->getTargetLowering()));
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::Win64:
+ addPass(createDwarfEHPass(TM));
+ break;
+ case ExceptionHandling::None:
+ addPass(createLowerInvokePass(TM->getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ addPass(createUnreachableBlockEliminationPass());
+ break;
+ }
+}
+
+/// Add pass to prepare the LLVM IR for code generation. This should be done
+/// before exception handling preparation passes.
+void TargetPassConfig::addCodeGenPrepare() {
+ if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
+ addPass(createCodeGenPreparePass(getTargetLowering()));
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
+ addPass(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel();
+
+ if (PrintISelInput)
+ addPass(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ addPass(createVerifierPass());
+}
+
+/// Add the complete set of target-independent postISel code generator passes.
+///
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
+///
+/// Any TargetPassConfig::addXX routine may be overriden by the Target. The
+/// addPre/Post methods with empty header implementations allow injecting
+/// target-specific fixups just before or after major stages. Additionally,
+/// targets have the flexibility to change pass order within a stage by
+/// overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+///
+/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
+/// before/after any target-independent pass. But it's currently overkill.
+void TargetPassConfig::addMachinePasses() {
+ // Insert a machine instr printer pass after the specified pass.
+ // If -print-machineinstrs specified, print machineinstrs after all passes.
+ if (StringRef(PrintMachineInstrs.getValue()).equals(""))
+ TM->Options.PrintMachineCode = true;
+ else if (!StringRef(PrintMachineInstrs.getValue())
+ .equals("option-unspecified")) {
+ const PassRegistry *PR = PassRegistry::getPassRegistry();
+ const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
+ const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs"));
+ assert (TPI && IPI && "Pass ID not registered!");
+ const char *TID = (const char *)(TPI->getTypeInfo());
+ const char *IID = (const char *)(IPI->getTypeInfo());
+ insertPass(TID, IID);
+ }
+
+ // Print the instruction selected machine code...
+ printAndVerify("After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ if (addPass(&ExpandISelPseudosID))
+ printAndVerify("After ExpandISelPseudos");
+
+ // Add passes that optimize machine instructions in SSA form.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addMachineSSAOptimization();
+ } else {
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID);
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc())
+ printAndVerify("After PreRegAlloc passes");
+
+ // Run register allocation and passes that are tightly coupled with it,
+ // including phi elimination and scheduling.
+ if (getOptimizeRegAlloc())
+ addOptimizedRegAlloc(createRegAllocPass(true));
+ else
+ addFastRegAlloc(createRegAllocPass(false));
+
+ // Run post-ra passes.
+ if (addPostRegAlloc())
+ printAndVerify("After PostRegAlloc passes");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ addPass(&PrologEpilogCodeInserterID);
+ printAndVerify("After PrologEpilogCodeInserter");
+
+ /// Add passes that optimize machine instructions after register allocation.
+ if (getOptLevel() != CodeGenOpt::None)
+ addMachineLateOptimization();
+
+ // Expand pseudo instructions before second scheduling pass.
+ addPass(&ExpandPostRAPseudosID);
+ printAndVerify("After ExpandPostRAPseudos");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2())
+ printAndVerify("After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(&PostRASchedulerID);
+ printAndVerify("After PostRAScheduler");
+ }
+
+ // GC
+ if (addGCPasses()) {
+ if (PrintGCInfo)
+ addPass(createGCInfoPrinter(dbgs()));
+ }
+
+ // Basic block placement.
+ if (getOptLevel() != CodeGenOpt::None)
+ addBlockPlacement();
+
+ if (addPreEmitPass())
+ printAndVerify("After PreEmit passes");
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+void TargetPassConfig::addMachineSSAOptimization() {
+ // Pre-ra tail duplication.
+ if (addPass(&EarlyTailDuplicateID))
+ printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(&OptimizePHIsID);
+
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringID);
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID);
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(&DeadMachineInstructionElimID);
+ printAndVerify("After codegen DCE pass");
+
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ if (addILPOpts())
+ printAndVerify("After ILP optimizations");
+
+ addPass(&MachineLICMID);
+ addPass(&MachineCSEID);
+ addPass(&MachineSinkingID);
+ printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+ addPass(&PeepholeOptimizerID);
+ printAndVerify("After codegen peephole optimization pass");
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
+//===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::getOptimizeRegAlloc() const {
+ switch (OptimizeRegAlloc) {
+ case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None;
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+ llvm_unreachable("Invalid optimize-regalloc state");
+}
+
+/// RegisterRegAlloc's global Registry tracks allocator registration.
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+/// A dummy default pass factory indicates whether the register allocator is
+/// overridden on the command line.
+static FunctionPass *useDefaultRegisterAllocator() { return 0; }
+static RegisterRegAlloc
+defaultRegAlloc("default",
+ "pick register allocator based on -O option",
+ useDefaultRegisterAllocator);
+
+/// -regalloc=... command line option.
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
+
+
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
+///
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) {
+ if (Optimized)
+ return createGreedyRegisterAllocator();
+ else
+ return createFastRegisterAllocator();
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level. Different register allocators are
+/// defined as separate passes because they may require different analysis.
+///
+/// This helper ensures that the regalloc= option is always available,
+/// even for targets that override the default allocator.
+///
+/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs,
+/// this can be folded into addPass.
+FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+ // Initialize the global default.
+ if (!Ctor) {
+ Ctor = RegAlloc;
+ RegisterRegAlloc::setDefault(RegAlloc);
+ }
+ if (Ctor != useDefaultRegisterAllocator)
+ return Ctor();
+
+ // With no -regalloc= override, ask the target for a regalloc pass.
+ return createTargetRegisterAllocator(Optimized);
+}
+
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
+
+ addPass(RegAllocPass);
+ printAndVerify("After Register Allocation");
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&ProcessImplicitDefsID);
+
+ // LiveVariables currently requires pure SSA form.
+ //
+ // FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+ // LiveVariables can be removed completely, and LiveIntervals can be directly
+ // computed. (We still either need to regenerate kill flags after regalloc, or
+ // preferably fix the scavenger to not depend on them).
+ addPass(&LiveVariablesID);
+
+ // Add passes that move from transformed SSA into conventional SSA. This is a
+ // "copy coalescing" problem.
+ //
+ if (!EnableStrongPHIElim) {
+ // Edge splitting is smarter with machine loop info.
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
+ }
+
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (EarlyLiveIntervals)
+ addPass(&LiveIntervalsID);
+
+ addPass(&TwoAddressInstructionPassID);
+
+ if (EnableStrongPHIElim)
+ addPass(&StrongPHIEliminationID);
+
+ addPass(&RegisterCoalescerID);
+
+ // PreRA instruction scheduling.
+ if (addPass(&MachineSchedulerID))
+ printAndVerify("After Machine Scheduling");
+
+ // Add the selected register allocation pass.
+ addPass(RegAllocPass);
+ printAndVerify("After Register Allocation, before rewriter");
+
+ // Allow targets to change the register assignments before rewriting.
+ if (addPreRewrite())
+ printAndVerify("After pre-rewrite passes");
+
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
+ printAndVerify("After Virtual Register Rewriter");
+
+ // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
+ // but eventually, all users of it should probably be moved to addPostRA and
+ // it can go away. Currently, it's the intended place for targets to run
+ // FinalizeMachineBundles, because passes other than MachineScheduling an
+ // RegAlloc itself may not be aware of bundles.
+ if (addFinalizeRegAlloc())
+ printAndVerify("After RegAlloc finalization");
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(&StackSlotColoringID);
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(&PostRAMachineLICMID);
+
+ printAndVerify("After StackSlotColoring and postra Machine LICM");
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+void TargetPassConfig::addMachineLateOptimization() {
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (addPass(&BranchFolderPassID))
+ printAndVerify("After BranchFolding");
+
+ // Tail duplication.
+ if (addPass(&TailDuplicateID))
+ printAndVerify("After TailDuplicate");
+
+ // Copy propagation.
+ if (addPass(&MachineCopyPropagationID))
+ printAndVerify("After copy propagation pass");
+}
+
+/// Add standard GC passes.
+bool TargetPassConfig::addGCPasses() {
+ addPass(&GCMachineCodeAnalysisID);
+ return true;
+}
+
+/// Add standard basic block placement passes.
+void TargetPassConfig::addBlockPlacement() {
+ if (addPass(&MachineBlockPlacementID)) {
+ // Run a separate pass to collect block placement statistics.
+ if (EnableBlockPlacementStats)
+ addPass(&MachineBlockPlacementStatsID);
+
+ printAndVerify("After machine block placement.");
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
new file mode 100644
index 0000000..a7439b5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -0,0 +1,577 @@
+//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Perform peephole optimizations on the machine code:
+//
+// - Optimize Extensions
+//
+// Optimization of sign / zero extension instructions. It may be extended to
+// handle other instructions with similar properties.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This optimization
+// will replace some uses of the pre-extension value with uses of the
+// sub-register of the results.
+//
+// - Optimize Comparisons
+//
+// Optimization of comparison instructions. For instance, in this code:
+//
+// sub r1, 1
+// cmp r1, 0
+// bz L1
+//
+// If the "sub" instruction all ready sets (or could be modified to set) the
+// same flag that the "cmp" instruction sets and that "bz" uses, then we can
+// eliminate the "cmp" instruction.
+//
+// Another instance, in this code:
+//
+// sub r1, r3 | sub r1, imm
+// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
+// bge L1
+//
+// If the branch instruction can use flag from "sub", then we can replace
+// "sub" with "subs" and eliminate the "cmp" instruction.
+//
+// - Optimize Bitcast pairs:
+//
+// v1 = bitcast v0
+// v2 = bitcast v1
+// = v2
+// =>
+// v1 = bitcast v0
+// = v0
+//
+// - Optimize Loads:
+//
+// Loads that can be folded into a later instruction. A load is foldable
+// if it loads to virtual registers and the virtual register defined has
+// a single use.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "peephole-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+// Optimize Extensions
+static cl::opt<bool>
+Aggressive("aggressive-ext-opt", cl::Hidden,
+ cl::desc("Aggressive extension optimization"));
+
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+ cl::desc("Disable the peephole optimizer"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
+STATISTIC(NumCmps, "Number of compares eliminated");
+STATISTIC(NumImmFold, "Number of move immediate folded");
+STATISTIC(NumLoadFold, "Number of loads folded");
+STATISTIC(NumSelects, "Number of selects optimized");
+
+namespace {
+ class PeepholeOptimizer : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT; // Machine dominator tree
+
+ public:
+ static char ID; // Pass identification
+ PeepholeOptimizer() : MachineFunctionPass(ID) {
+ initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ if (Aggressive) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ }
+
+ private:
+ bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool optimizeSelect(MachineInstr *MI);
+ bool isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
+ };
+}
+
+char PeepholeOptimizer::ID = 0;
+char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
+
+/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify the
+/// source, and if the source value is preserved as a sub-register of the
+/// result, then replace all reachable uses of the source with the subreg of the
+/// result.
+///
+/// Do not generate an EXTRACT that is used only in a debug use, as this changes
+/// the code. Since this code does not currently share EXTRACTs, just ignore all
+/// debug uses.
+bool PeepholeOptimizer::
+optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+ unsigned SrcReg, DstReg, SubIdx;
+ if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
+ return false;
+
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ if (MRI->hasOneNonDBGUse(SrcReg))
+ // No other uses.
+ return false;
+
+ // Ensure DstReg can get a register class that actually supports
+ // sub-registers. Don't change the class until we commit.
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
+ if (!DstRC)
+ return false;
+
+ // The ext instr may be operating on a sub-register of SrcReg as well.
+ // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
+ // register.
+ // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
+ // SrcReg:SubIdx should be replaced.
+ bool UseSrcSubIdx = TM->getRegisterInfo()->
+ getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;
+
+ // The source has other uses. See if we can replace the other uses with use of
+ // the result of the extension.
+ SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
+ ReachedBBs.insert(UI->getParent());
+
+ // Uses that are in the same BB of uses of the result of the instruction.
+ SmallVector<MachineOperand*, 8> Uses;
+
+ // Uses that the result of the instruction can reach.
+ SmallVector<MachineOperand*, 8> ExtendedUses;
+
+ bool ExtendLife = true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end();
+ UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ if (UseMI == MI)
+ continue;
+
+ if (UseMI->isPHI()) {
+ ExtendLife = false;
+ continue;
+ }
+
+ // Only accept uses of SrcReg:SubIdx.
+ if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
+ continue;
+
+ // It's an error to translate this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+ //
+ // into this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1027 = COPY %reg1025:4
+ // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+ //
+ // The problem here is that SUBREG_TO_REG is there to assert that an
+ // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+ // the COPY here, it will give us the value after the <sext>, not the
+ // original value of %reg1024 before <sext>.
+ if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ continue;
+
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ // Local uses that come after the extension.
+ if (!LocalMIs.count(UseMI))
+ Uses.push_back(&UseMO);
+ } else if (ReachedBBs.count(UseMBB)) {
+ // Non-local uses where the result of the extension is used. Always
+ // replace these unless it's a PHI.
+ Uses.push_back(&UseMO);
+ } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
+ // We may want to extend the live range of the extension result in order
+ // to replace these uses.
+ ExtendedUses.push_back(&UseMO);
+ } else {
+ // Both will be live out of the def MBB anyway. Don't extend live range of
+ // the extension result.
+ ExtendLife = false;
+ break;
+ }
+ }
+
+ if (ExtendLife && !ExtendedUses.empty())
+ // Extend the liveness of the extension result.
+ std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+ std::back_inserter(Uses));
+
+ // Now replace all uses.
+ bool Changed = false;
+ if (!Uses.empty()) {
+ SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+
+ // Look for PHI uses of the extended result, we don't want to extend the
+ // liveness of a PHI input. It breaks all kinds of assumptions down
+ // stream. A PHI use is expected to be the kill of its source values.
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
+ if (UI->isPHI())
+ PHIBBs.insert(UI->getParent());
+
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ MachineOperand *UseMO = Uses[i];
+ MachineInstr *UseMI = UseMO->getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (PHIBBs.count(UseMBB))
+ continue;
+
+ // About to add uses of DstReg, clear DstReg's kill flags.
+ if (!Changed) {
+ MRI->clearKillFlags(DstReg);
+ MRI->constrainRegClass(DstReg, DstRC);
+ }
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(DstReg, 0, SubIdx);
+ // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
+ if (UseSrcSubIdx) {
+ Copy->getOperand(0).setSubReg(SubIdx);
+ Copy->getOperand(0).setIsUndef();
+ }
+ UseMO->setReg(NewVR);
+ ++NumReuse;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that
+/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
+/// a value cross register classes), and the source is defined by another
+/// bitcast instruction B. And if the register class of source of B matches
+/// the register class of instruction A, then it is legal to replace all uses
+/// of the def of A with source of B. e.g.
+/// %vreg0<def> = VMOVSR %vreg1
+/// %vreg3<def> = VMOVRS %vreg0
+/// Replace all uses of vreg3 with vreg1.
+
+bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ unsigned NumDefs = MI->getDesc().getNumDefs();
+ unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
+ if (NumDefs != 1)
+ return false;
+
+ unsigned Def = 0;
+ unsigned Src = 0;
+ for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Def = Reg;
+ else if (Src)
+ // Multiple sources?
+ return false;
+ else
+ Src = Reg;
+ }
+
+ assert(Def && Src && "Malformed bitcast instruction!");
+
+ MachineInstr *DefMI = MRI->getVRegDef(Src);
+ if (!DefMI || !DefMI->isBitcast())
+ return false;
+
+ unsigned SrcSrc = 0;
+ NumDefs = DefMI->getDesc().getNumDefs();
+ NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
+ if (NumDefs != 1)
+ return false;
+ for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+ const MachineOperand &MO = DefMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (!MO.isDef()) {
+ if (SrcSrc)
+ // Multiple sources?
+ return false;
+ else
+ SrcSrc = Reg;
+ }
+ }
+
+ if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
+ return false;
+
+ MRI->replaceRegWith(Def, SrcSrc);
+ MRI->clearKillFlags(SrcSrc);
+ MI->eraseFromParent();
+ ++NumBitcasts;
+ return true;
+}
+
+/// optimizeCmpInstr - If the instruction is a compare and the previous
+/// instruction it's comparing against all ready sets (or could be modified to
+/// set) the same flag as the compare, then we can remove the comparison and use
+/// the flag from the previous instruction.
+bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ // If this instruction is a comparison against zero and isn't comparing a
+ // physical register, we can try to optimize it.
+ unsigned SrcReg, SrcReg2;
+ int CmpMask, CmpValue;
+ if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+ (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
+ return false;
+
+ // Attempt to optimize the comparison instruction.
+ if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
+ ++NumCmps;
+ return true;
+ }
+
+ return false;
+}
+
+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
+ unsigned TrueOp = 0;
+ unsigned FalseOp = 0;
+ bool Optimizable = false;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+ return false;
+ if (!Optimizable)
+ return false;
+ if (!TII->optimizeSelect(MI))
+ return false;
+ MI->eraseFromParent();
+ ++NumSelects;
+ return true;
+}
+
+/// isLoadFoldable - Check whether MI is a candidate for folding into a later
+/// instruction. We only fold loads to virtual registers and the virtual
+/// register defined has a single use.
+bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
+ unsigned &FoldAsLoadDefReg) {
+ if (!MI->canFoldAsLoad() || !MI->mayLoad())
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+
+ unsigned Reg = MI->getOperand(0).getReg();
+ // To reduce compilation time, we check MRI->hasOneUse when inserting
+ // loads. It should be checked when processing uses of the load, since
+ // uses can be removed during peephole.
+ if (!MI->getOperand(0).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->hasOneUse(Reg)) {
+ FoldAsLoadDefReg = Reg;
+ return true;
+ }
+ return false;
+}
+
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MI->isMoveImmediate())
+ return false;
+ if (MCID.getNumDefs() != 1)
+ return false;
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ ImmDefMIs.insert(std::make_pair(Reg, MI));
+ ImmDefRegs.insert(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+/// foldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (ImmDefRegs.count(Reg) == 0)
+ continue;
+ DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ assert(II != ImmDefMIs.end());
+ if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+ ++NumImmFold;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
+ DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
+
+ if (DisablePeephole)
+ return false;
+
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ MRI = &MF.getRegInfo();
+ DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+ bool Changed = false;
+
+ SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallSet<unsigned, 4> ImmDefRegs;
+ DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ unsigned FoldAsLoadDefReg;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ bool SeenMoveImm = false;
+ LocalMIs.clear();
+ ImmDefRegs.clear();
+ ImmDefMIs.clear();
+ FoldAsLoadDefReg = 0;
+
+ for (MachineBasicBlock::iterator
+ MII = I->begin(), MIE = I->end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+ // We may be erasing MI below, increment MII now.
+ ++MII;
+ LocalMIs.insert(MI);
+
+ // If there exists an instruction which belongs to the following
+ // categories, we will discard the load candidate.
+ if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+ MI->hasUnmodeledSideEffects()) {
+ FoldAsLoadDefReg = 0;
+ continue;
+ }
+ if (MI->mayStore() || MI->isCall())
+ FoldAsLoadDefReg = 0;
+
+ if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
+ (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+ (MI->isSelect() && optimizeSelect(MI))) {
+ // MI is deleted.
+ LocalMIs.erase(MI);
+ Changed = true;
+ continue;
+ }
+
+ if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+ SeenMoveImm = true;
+ } else {
+ Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+ // optimizeExtInstr might have created new instructions after MI
+ // and before the already incremented MII. Adjust MII so that the
+ // next iteration sees the new instructions.
+ MII = MI;
+ ++MII;
+ if (SeenMoveImm)
+ Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+ }
+
+ // Check whether MI is a load candidate for folding into a later
+ // instruction. If MI is not a candidate, check whether we can fold an
+ // earlier load into MI.
+ if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+ // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
+ // can enable folding by converting SUB to CMP.
+ MachineInstr *DefMI = 0;
+ MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+ FoldAsLoadDefReg, DefMI);
+ if (FoldMI) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+ DEBUG(dbgs() << "Replacing: " << *MI);
+ DEBUG(dbgs() << " With: " << *FoldMI);
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ ++NumLoadFold;
+
+ // MI is replaced with FoldMI.
+ Changed = true;
+ continue;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 0000000..53fe273
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,776 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "llvm/CodeGen/Passes.h"
+#include "AggressiveAntiDepBreaker.h"
+#include "AntiDepBreaker.h"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
+
+// Post-RA scheduling is enabled with
+// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+ cl::desc("Enable scheduling after register allocation"),
+ cl::init(false), cl::Hidden);
+static cl::opt<std::string>
+EnableAntiDepBreaking("break-anti-dependencies",
+ cl::desc("Break post-RA scheduling anti-dependencies: "
+ "\"critical\", \"all\", or \"none\""),
+ cl::init("none"), cl::Hidden);
+
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+
+AntiDepBreaker::~AntiDepBreaker() { }
+
+namespace {
+ class PostRAScheduler : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
+
+ public:
+ static char ID;
+ PostRAScheduler() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char PostRAScheduler::ID = 0;
+
+ class SchedulePostRATDList : public ScheduleDAGInstrs {
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ LatencyPriorityQueue AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+ AntiDepBreaker *AntiDepBreak;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+ /// LiveRegs - true if the register is live.
+ BitVector LiveRegs;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
+ public:
+ SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, const RegisterClassInfo&,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs);
+
+ ~SchedulePostRATDList();
+
+ /// startBlock - Initialize register live-range state for scheduling in
+ /// this block.
+ ///
+ void startBlock(MachineBasicBlock *BB);
+
+ /// Initialize the scheduler state for the next scheduling region.
+ virtual void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount);
+
+ /// Notify that the scheduler has finished scheduling the current region.
+ virtual void exitRegion();
+
+ /// Schedule - Schedule the instruction range using list scheduling.
+ ///
+ void schedule();
+
+ void EmitSchedule();
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count);
+
+ /// finishBlock - Clean up register live-range state.
+ ///
+ void finishBlock();
+
+ /// FixupKills - Fix register kill flags that have been made
+ /// invalid due to scheduling
+ ///
+ void FixupKills(MachineBasicBlock *MBB);
+
+ private:
+ void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+ void StartBlockForKills(MachineBasicBlock *BB);
+
+ // ToggleKillFlag - Toggle a register operand kill flag. Other
+ // adjustments may be made to the instruction if necessary. Return
+ // true if the operand has been deleted, false if not.
+ bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+ void dumpSchedule() const;
+ };
+}
+
+char &llvm::PostRASchedulerID = PostRAScheduler::ID;
+
+INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
+ "Post RA top-down list latency scheduler", false, false)
+
+SchedulePostRATDList::SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA),
+ LiveRegs(TRI->getNumRegs())
+{
+ const TargetMachine &TM = MF.getTarget();
+ const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+ HazardRec =
+ TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+
+ assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
+ MRI.tracksLiveness()) &&
+ "Live-ins must be accurate for anti-dependency breaking");
+ AntiDepBreak =
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
+ (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+ delete HazardRec;
+ delete AntiDepBreak;
+}
+
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+ Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+ ScheduleDAGInstrs::exitRegion();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// dumpSchedule - dump the scheduled Sequence.
+void SchedulePostRATDList::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+#endif
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+ TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+
+ RegClassInfo.runOnMachineFunction(Fn);
+
+ // Check for explicit enable/disable of post-ra scheduling.
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+ TargetSubtargetInfo::ANTIDEP_NONE;
+ SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
+ if (EnablePostRAScheduler.getPosition() > 0) {
+ if (!EnablePostRAScheduler)
+ return false;
+ } else {
+ // Check that post-RA scheduling is enabled for this target.
+ // This may upgrade the AntiDepMode.
+ const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
+ if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
+ CriticalPathRCs))
+ return false;
+ }
+
+ // Check for antidep breaking override...
+ if (EnableAntiDepBreaking.getPosition() > 0) {
+ AntiDepMode = (EnableAntiDepBreaking == "all")
+ ? TargetSubtargetInfo::ANTIDEP_ALL
+ : ((EnableAntiDepBreaking == "critical")
+ ? TargetSubtargetInfo::ANTIDEP_CRITICAL
+ : TargetSubtargetInfo::ANTIDEP_NONE);
+ }
+
+ DEBUG(dbgs() << "PostRAScheduler\n");
+
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
+ CriticalPathRCs);
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int bbcnt = 0;
+ if (bbcnt++ % DebugDiv != DebugMod)
+ continue;
+ dbgs() << "*** DEBUG scheduling " << Fn.getName()
+ << ":BB#" << MBB->getNumber() << " ***\n";
+ }
+#endif
+
+ // Initialize register live-range state for scheduling in this block.
+ Scheduler.startBlock(MBB);
+
+ // Schedule each sequence of instructions not interrupted by a label
+ // or anything else that effectively needs to shut down scheduling.
+ MachineBasicBlock::iterator Current = MBB->end();
+ unsigned Count = MBB->size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+ MachineInstr *MI = llvm::prior(I);
+ // Calls are not scheduling boundaries before register allocation, but
+ // post-ra we don't gain anything by scheduling across calls since we
+ // don't need to worry about register pressure.
+ if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
+ Scheduler.enterRegion(MBB, I, Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
+ Scheduler.EmitSchedule();
+ Current = MI;
+ CurrentCount = Count - 1;
+ Scheduler.Observe(MI, CurrentCount);
+ }
+ I = MI;
+ --Count;
+ if (MI->isBundle())
+ Count -= MI->getBundleSize();
+ }
+ assert(Count == 0 && "Instruction count mismatch!");
+ assert((MBB->begin() == Current || CurrentCount != 0) &&
+ "Instruction count mismatch!");
+ Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
+ Scheduler.EmitSchedule();
+
+ // Clean up register live-range state.
+ Scheduler.finishBlock();
+
+ // Update register kills
+ Scheduler.FixupKills(MBB);
+ }
+
+ return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
+ // Call the superclass.
+ ScheduleDAGInstrs::startBlock(BB);
+
+ // Reset the hazard recognizer and anti-dep breaker.
+ HazardRec->Reset();
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->StartBlock(BB);
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(AA);
+
+ if (AntiDepBreak != NULL) {
+ unsigned Broken =
+ AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+ EndIndex, DbgValues);
+
+ if (Broken != 0) {
+ // We made changes. Update the dependency graph.
+ // Theoretically we could update the graph in place:
+ // When a live range is changed to use a different register, remove
+ // the def's anti-dependence *and* output-dependence edges due to
+ // that register, and add new anti-dependence and output-dependence
+ // edges based on the next live range of the register.
+ ScheduleDAG::clearDAG();
+ buildSchedGraph(AA);
+
+ NumFixedAnti += Broken;
+ }
+ }
+
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ AvailableQueue.initNodes(SUnits);
+ ListScheduleTopDown();
+ AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->Observe(MI, Count, EndIndex);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::finishBlock() {
+ if (AntiDepBreak != NULL)
+ AntiDepBreak->FinishBlock();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::finishBlock();
+}
+
+/// StartBlockForKills - Initialize register live-range state for updating kills
+///
+void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
+ // Start with no live registers.
+ LiveRegs.reset();
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ LiveRegs.set(Reg);
+ // Repeat, for all subregs.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
+ }
+ }
+}
+
+bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
+ MachineOperand &MO) {
+ // Setting kill flag...
+ if (!MO.isKill()) {
+ MO.setIsKill(true);
+ return false;
+ }
+
+ // If MO itself is live, clear the kill flag...
+ if (LiveRegs.test(MO.getReg())) {
+ MO.setIsKill(false);
+ return false;
+ }
+
+ // If any subreg of MO is live, then create an imp-def for that
+ // subreg and keep MO marked as killed.
+ MO.setIsKill(false);
+ bool AllDead = true;
+ const unsigned SuperReg = MO.getReg();
+ MachineInstrBuilder MIB(MF, MI);
+ for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ MIB.addReg(*SubRegs, RegState::ImplicitDefine);
+ AllDead = false;
+ }
+ }
+
+ if(AllDead)
+ MO.setIsKill(true);
+ return false;
+}
+
+/// FixupKills - Fix the register kill flags, they may have been made
+/// incorrect by instruction reordering.
+///
+void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+
+ BitVector killedRegs(TRI->getNumRegs());
+
+ StartBlockForKills(MBB);
+
+ // Examine block from end to start...
+ unsigned Count = MBB->size();
+ for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
+
+ // Update liveness. Registers that are defed but not used in this
+ // instruction are now dead. Mark register and all subregs as they
+ // are completely defined.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ LiveRegs.clearBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ LiveRegs.reset(Reg);
+
+ // Repeat for all subregs.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.reset(*SubRegs);
+ }
+
+ // Examine all used registers and set/clear kill flag. When a
+ // register is used multiple times we only set the kill flag on
+ // the first use.
+ killedRegs.reset();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+ bool kill = false;
+ if (!killedRegs.test(Reg)) {
+ kill = true;
+ // A register is not killed if any subregs are live...
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ kill = false;
+ break;
+ }
+ }
+
+ // If subreg is not live, then register is killed if it became
+ // live in this instruction
+ if (kill)
+ kill = !LiveRegs.test(Reg);
+ }
+
+ if (MO.isKill() != kill) {
+ DEBUG(dbgs() << "Fixing " << MO << " in ");
+ // Warning: ToggleKillFlag may invalidate MO.
+ ToggleKillFlag(MI, MO);
+ DEBUG(MI->dump());
+ }
+
+ killedRegs.set(Reg);
+ }
+
+ // Mark any used register (that is not using undef) and subregs as
+ // now live...
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+ LiveRegs.set(Reg);
+
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ // Standard scheduler algorithms will recompute the depth of the successor
+ // here as such:
+ // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+ //
+ // However, we lazily compute node depth instead. Note that
+ // ScheduleNodeTopDown has already updated the depth of this node which causes
+ // all descendents to be marked dirty. Setting the successor depth explicitly
+ // here would cause depth to be recomputed for all its ancestors. If the
+ // successor is not yet ready (because of a transitively redundant edge) then
+ // this causes depth computation to be quadratic in the size of the DAG.
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() &&
+ "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue.scheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // We're scheduling top-down but we're visiting the regions in
+ // bottom-up order, so we don't know the hazards at the start of a
+ // region. So assume no hazards (this should usually be ok as most
+ // blocks are a single region).
+ HazardRec->Reset();
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // Add all leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
+ AvailableQueue.push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // In any cycle where we can't schedule any instructions, we must
+ // stall or emit a noop, depending on the target.
+ bool CycleHasInsts = false;
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ unsigned MinDepth = ~0u;
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() <= CurCycle) {
+ AvailableQueue.push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else if (PendingQueue[i]->getDepth() < MinDepth)
+ MinDepth = PendingQueue[i]->getDepth();
+ }
+
+ DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
+
+ SUnit *FoundSUnit = 0;
+ bool HasNoopHazards = false;
+ while (!AvailableQueue.empty()) {
+ SUnit *CurSUnit = AvailableQueue.pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue.push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule...
+ if (FoundSUnit) {
+ // ... schedule the node...
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+ CycleHasInsts = true;
+ if (HazardRec->atIssueLimit()) {
+ DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ } else {
+ if (CycleHasInsts) {
+ DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem,
+ // just advance the current cycle and try again.
+ DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ }
+
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ }
+
+#ifndef NDEBUG
+ unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+ RegionBegin = RegionEnd;
+
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue)
+ BB->splice(RegionEnd, BB, FirstDbgValue);
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ BB->splice(RegionEnd, BB, SU->getInstr());
+ else
+ // Null SUnit* is a noop.
+ TII->insertNoop(*BB, RegionEnd);
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (i == 0)
+ RegionBegin = prior(RegionEnd);
+ }
+
+ // Reinsert any remaining debug_values.
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrivMI = P.second;
+ BB->splice(++OrigPrivMI, BB, DbgValue);
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+}
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
new file mode 100644
index 0000000..e4e18c3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -0,0 +1,170 @@
+//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "processimplicitdefs"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+namespace {
+/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
+/// for each use. Add isUndef marker to implicit_def defs and their uses.
+class ProcessImplicitDefs : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ SmallSetVector<MachineInstr*, 16> WorkList;
+
+ void processImplicitDef(MachineInstr *MI);
+ bool canTurnIntoImplicitDef(MachineInstr *MI);
+
+public:
+ static char ID;
+
+ ProcessImplicitDefs() : MachineFunctionPass(ID) {
+ initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+};
+} // end anonymous namespace
+
+char ProcessImplicitDefs::ID = 0;
+char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
+
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+
+void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<AliasAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
+ if (!MI->isCopyLike() &&
+ !MI->isInsertSubreg() &&
+ !MI->isRegSequence() &&
+ !MI->isPHI())
+ return false;
+ for (MIOperands MO(MI); MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isUse() && MO->readsReg())
+ return false;
+ return true;
+}
+
+void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
+ DEBUG(dbgs() << "Processing " << *MI);
+ unsigned Reg = MI->getOperand(0).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // For virtual regiusters, mark all uses as <undef>, and convert users to
+ // implicit-def when possible.
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ MRI->use_nodbg_begin(Reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineOperand &MO = UI.getOperand();
+ MO.setIsUndef();
+ MachineInstr *UserMI = MO.getParent();
+ if (!canTurnIntoImplicitDef(UserMI))
+ continue;
+ DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI);
+ UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ WorkList.insert(UserMI);
+ }
+ MI->eraseFromParent();
+ return;
+ }
+
+ // This is a physreg implicit-def.
+ // Look for the first instruction to use or define an alias.
+ MachineBasicBlock::instr_iterator UserMI = MI;
+ MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
+ bool Found = false;
+ for (++UserMI; UserMI != UserE; ++UserMI) {
+ for (MIOperands MO(UserMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned UserReg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(UserReg) ||
+ !TRI->regsOverlap(Reg, UserReg))
+ continue;
+ // UserMI uses or redefines Reg. Set <undef> flags on all uses.
+ Found = true;
+ if (MO->isUse())
+ MO->setIsUndef();
+ }
+ if (Found)
+ break;
+ }
+
+ // If we found the using MI, we can erase the IMPLICIT_DEF.
+ if (Found) {
+ DEBUG(dbgs() << "Physreg user: " << *UserMI);
+ MI->eraseFromParent();
+ return;
+ }
+
+ // Using instr wasn't found, it could be in another block.
+ // Leave the physreg IMPLICIT_DEF, but trim any extra operands.
+ for (unsigned i = MI->getNumOperands() - 1; i; --i)
+ MI->RemoveOperand(i);
+ DEBUG(dbgs() << "Keeping physreg: " << *MI);
+}
+
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into
+/// <undef> operands.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ bool Changed = false;
+
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
+ assert(WorkList.empty() && "Inconsistent worklist state");
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ // Scan the basic block for implicit defs.
+ for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI)
+ if (MBBI->isImplicitDef())
+ WorkList.insert(MBBI);
+
+ if (WorkList.empty())
+ continue;
+
+ DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size()
+ << " implicit defs.\n");
+ Changed = true;
+
+ // Drain the WorkList to recursively process any new implicit defs.
+ do processImplicitDef(WorkList.pop_back_val());
+ while (!WorkList.empty());
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 0000000..e5872df
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,892 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass provides an optional shrink wrapping variant of prolog/epilog
+// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pei"
+#include "PrologEpilogInserter.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <climits>
+
+using namespace llvm;
+
+char PEI::ID = 0;
+char &llvm::PrologEpilogCodeInserterID = PEI::ID;
+
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion & Frame Finalization",
+ false, false)
+
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+STATISTIC(NumBytesStackSpace,
+ "Number of bytes used for stack in all functions");
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+ const Function* F = Fn.getFunction();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
+ assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
+
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+ // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+ // function's frame information. Also eliminates call frame pseudo
+ // instructions.
+ calculateCallsInformation(Fn);
+
+ // Allow the target machine to make some adjustments to the function
+ // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+ TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+ // Scan the function for modified callee saved registers and insert spill code
+ // for any callee saved registers that are modified.
+ calculateCalleeSavedRegisters(Fn);
+
+ // Determine placement of CSR spill/restore code:
+ // - With shrink wrapping, place spills and restores to tightly
+ // enclose regions in the Machine CFG of the function where
+ // they are used.
+ // - Without shink wrapping (default), place all spills in the
+ // entry block, all restores in return blocks.
+ placeCSRSpillsAndRestores(Fn);
+
+ // Add the code to save and restore the callee saved registers
+ if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
+ insertCSRSpillsAndRestores(Fn);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TFI->processFunctionBeforeFrameFinalized(Fn, RS);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters()
+ // must be called before this function in order to set the AdjustsStack
+ // and MaxCallFrameSize variables.
+ if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ // If register scavenging is needed, as we've enabled doing it as a
+ // post-pass, scavenge the virtual registers that frame index elimiation
+ // inserted.
+ if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+ scavengeFrameVirtualRegs(Fn);
+
+ // Clear any vregs created by virtual scavenging.
+ Fn.getRegInfo().clearVirtRegs();
+
+ delete RS;
+ clearAllSets();
+ return true;
+}
+
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallsInformation(MachineFunction &Fn) {
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ unsigned MaxCallFrameSize = 0;
+ bool AdjustsStack = MFI->adjustsStack();
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ // Early exit for targets which have no call frame setup/destroy pseudo
+ // instructions.
+ if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+ return;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImm();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ AdjustsStack = true;
+ FrameSDOps.push_back(I);
+ } else if (I->isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
+ }
+
+ MFI->setAdjustsStack(AdjustsStack);
+ MFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (std::vector<MachineBasicBlock::iterator>::iterator
+ i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+ MachineBasicBlock::iterator I = *i;
+
+ // If call frames are not being included as part of the stack frame, and
+ // the target doesn't indicate otherwise, remove the call frame pseudos
+ // here. The sub/add sp instruction pairs are still inserted, but we don't
+ // need to track the SP adjustment for frame index elimination.
+ if (TFI->canSimplifyCallFramePseudos(Fn))
+ TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+}
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers.
+void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
+ const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = F.getTarget().getFrameLowering();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+
+ // Get the callee saved register list...
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+
+ // These are used to keep track the callee-save area. Initialize them.
+ MinCSFrameIndex = INT_MAX;
+ MaxCSFrameIndex = 0;
+
+ // Early exit for targets which have no callee saved registers.
+ if (CSRegs == 0 || CSRegs[0] == 0)
+ return;
+
+ // In Naked functions we aren't going to save any registers.
+ if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
+ return;
+
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (F.getRegInfo().isPhysRegUsed(Reg)) {
+ // If the reg is modified, save it!
+ CSI.push_back(CalleeSavedInfo(Reg));
+ }
+ }
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (std::vector<CalleeSavedInfo>::iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ unsigned Reg = I->getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+ I->setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
+ }
+
+ I->setFrameIdx(FrameIdx);
+ }
+
+ MFI->setCalleeSavedInfo(CSI);
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
+///
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+ // Get callee saved register information.
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ MFI->setCalleeSavedInfoValid(true);
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ MachineBasicBlock::iterator I;
+
+ if (!ShrinkWrapThisFunction) {
+ // Spill using target interface.
+ I = EntryBlock->begin();
+ if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ EntryBlock->addLiveIn(CSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*EntryBlock, I, Reg, true,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+ }
+
+ // Restore using target interface.
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
+ MachineBasicBlock* MBB = ReturnBlocks[ri];
+ I = MBB->end(); --I;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
+ CSI[i].getFrameIdx(),
+ RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ // Insert spills.
+ std::vector<CalleeSavedInfo> blockCSI;
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet save = BI->second;
+
+ if (save.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = save.begin(),
+ RE = save.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not collect callee saved register info");
+
+ I = MBB->begin();
+
+ // When shrink wrapping, use stack slot stores/loads.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ MBB->addLiveIn(blockCSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*MBB, I, Reg,
+ true,
+ blockCSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restore = BI->second;
+
+ if (restore.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = restore.begin(),
+ RE = restore.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not find callee saved register info");
+
+ // If MBB is empty and needs restores, insert at the _beginning_.
+ if (MBB->empty()) {
+ I = MBB->begin();
+ } else {
+ I = MBB->end();
+ --I;
+
+ // Skip over all terminator instructions, which are part of the
+ // return sequence.
+ if (! I->isTerminator()) {
+ ++I;
+ } else {
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
+ I = I2;
+ }
+ }
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
+ blockCSI[i].getFrameIdx(),
+ RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, Offset);
+ Offset += MFI->getObjectSize(FrameIdx);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ LocalAreaOffset = -LocalAreaOffset;
+ assert(LocalAreaOffset >= 0
+ && "Local area offset should be in direction of stack growth");
+ int64_t Offset = LocalAreaOffset;
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -MFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If the stack grows down, we need to add the size to find the lowest
+ // address of the object.
+ Offset += MFI->getObjectSize(i);
+
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else {
+ int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+ for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MFI->setObjectOffset(i, Offset);
+ Offset += MFI->getObjectSize(i);
+ }
+ }
+
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // frame pointer if a frame pointer is required.
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
+ !RegInfo->needsStackRealignment(Fn)) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+ Offset, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (LargeStackObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
+ !RegInfo->useFPForScavengingIndex(Fn))) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ if (!TFI.targetHandlesStackFrameRounding()) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI.getStackAlignment();
+ else
+ StackAlign = TFI.getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ int64_t StackSize = Offset - LocalAreaOffset;
+ MFI->setStackSize(StackSize);
+ NumBytesStackSpace += StackSize;
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+ // Add prologue to the function...
+ TFI.emitPrologue(Fn);
+
+ // Add epilogue to restore the callee-save registers in each exiting block
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().isReturn())
+ TFI.emitEpilogue(Fn, *I);
+ }
+
+ // Emit additional code that is required to support segmented stacks, if
+ // we've been asked for it. This, when linked with a runtime with support
+ // for segmented stacks (libgcc is one), will result in allocating stack
+ // space in small chunks instead of one large contiguous block.
+ if (Fn.getTarget().Options.EnableSegmentedStacks)
+ TFI.adjustForSegmentedStacks(Fn);
+
+ // Emit additional code that is required to explicitly handle the stack in
+ // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+ // approach is rather similar to that of Segmented Stacks, but it uses a
+ // different conditional check and another BIF for allocating more stack
+ // space.
+ if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+ TFI.adjustForHiPEPrologue(Fn);
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+ if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+ const TargetMachine &TM = Fn.getTarget();
+ assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+#ifndef NDEBUG
+ int SPAdjCount = 0; // frame setup / destroy count.
+#endif
+ int SPAdj = 0; // SP offset due to call frame setup / destroy.
+ if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+#ifndef NDEBUG
+ // Track whether we see even pairs of them
+ SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1;
+#endif
+ // Remember how much SP has been adjusted to create the call
+ // frame.
+ int Size = I->getOperand(0).getImm();
+
+ if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+ Size = -Size;
+
+ SPAdj += Size;
+
+ MachineBasicBlock::iterator PrevI = BB->end();
+ if (I != BB->begin()) PrevI = prior(I);
+ TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+ // Visit the instructions created by eliminateCallFramePseudoInstr().
+ if (PrevI == BB->end())
+ I = BB->begin(); // The replaced instr was the first in the block.
+ else
+ I = llvm::next(PrevI);
+ continue;
+ }
+
+ MachineInstr *MI = I;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (!MI->getOperand(i).isFI())
+ continue;
+
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+ TRI.eliminateFrameIndex(MI, SPAdj, i,
+ FrameIndexVirtualScavenging ? NULL : RS);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ MI = 0;
+ break;
+ }
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+ }
+
+ // If we have evenly matched pairs of frame setup / destroy instructions,
+ // make sure the adjustments come out to zero. If we don't have matched
+ // pairs, we can't be sure the missing bit isn't in another basic block
+ // due to a custom inserter playing tricks, so just asserting SPAdj==0
+ // isn't sufficient. See tMOVCC on Thumb1, for example.
+ assert((SPAdjCount || SPAdj == 0) &&
+ "Unbalanced call frame setup / destroy pairs?");
+ }
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
+void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+ // Run through the instructions and find any virtual registers.
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+ RS->enterBasicBlock(BB);
+
+ int SPAdj = 0;
+
+ // The instruction stream may change in the loop, so check BB->end()
+ // directly.
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ MachineInstr *MI = I;
+ MachineBasicBlock::iterator J = llvm::next(I);
+ MachineBasicBlock::iterator P = I == BB->begin() ?
+ MachineBasicBlock::iterator(NULL) : llvm::prior(I);
+
+ // RS should process this instruction before we might scavenge at this
+ // location. This is because we might be replacing a virtual register
+ // defined by this instruction, and if so, registers killed by this
+ // instruction are available, and defined registers are not.
+ RS->forward(I);
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isReg()) {
+ MachineOperand &MO = MI->getOperand(i);
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MI->getOperand(i).isDef() &&
+ "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+ unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
+ // Replace this reference to the virtual register with the
+ // scratch register.
+ assert (ScratchReg && "Missing scratch register!");
+ Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
+
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS->setUsed(ScratchReg);
+ }
+ }
+
+ // If the scavenger needed to use one of its spill slots, the
+ // spill code will have been inserted in between I and J. This is a
+ // problem because we need the spill code before I: Move I to just
+ // prior to J.
+ if (I != llvm::prior(J)) {
+ BB->splice(J, BB, I);
+
+ // Before we move I, we need to prepare the RS to visit I again.
+ // Specifically, RS will assert if it sees uses of registers that
+ // it believes are undefined. Because we have already processed
+ // register kills in I, when it visits I again, it will believe that
+ // those registers are undefined. To avoid this situation, unprocess
+ // the instruction I.
+ assert(RS->getCurrentPosition() == I &&
+ "The register scavenger has an unexpected position");
+ I = P;
+ RS->unprocess(P);
+
+ // RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I));
+ } else
+ ++I;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
new file mode 100644
index 0000000..87fff9a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
@@ -0,0 +1,173 @@
+//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass also implements a shrink wrapping variant of prolog/epilog
+// insertion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PEI_H
+#define LLVM_CODEGEN_PEI_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class RegScavenger;
+ class MachineBasicBlock;
+
+ class PEI : public MachineFunctionPass {
+ public:
+ static char ID;
+ PEI() : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Analysis info for spill/restore placement.
+ // "CSR": "callee saved register".
+
+ // CSRegSet contains indices into the Callee Saved Register Info
+ // vector built by calculateCalleeSavedRegisters() and accessed
+ // via MF.getFrameInfo()->getCalleeSavedInfo().
+ typedef SparseBitVector<> CSRegSet;
+
+ // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+ // saved register indices.
+ typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+ // Set and maps for computing CSR spill/restore placement:
+ // used in function (UsedCSRegs)
+ // used in a basic block (CSRUsed)
+ // anticipatable in a basic block (Antic{In,Out})
+ // available in a basic block (Avail{In,Out})
+ // to be spilled at the entry to a basic block (CSRSave)
+ // to be restored at the end of a basic block (CSRRestore)
+ CSRegSet UsedCSRegs;
+ CSRegBlockMap CSRUsed;
+ CSRegBlockMap AnticIn, AnticOut;
+ CSRegBlockMap AvailIn, AvailOut;
+ CSRegBlockMap CSRSave;
+ CSRegBlockMap CSRRestore;
+
+ // Entry and return blocks of the current function.
+ MachineBasicBlock* EntryBlock;
+ SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+ // Map of MBBs to top level MachineLoops.
+ DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
+
+ // Flag to control shrink wrapping per-function:
+ // may choose to skip shrink wrapping for certain
+ // functions.
+ bool ShrinkWrapThisFunction;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the curren function.
+ bool FrameIndexVirtualScavenging;
+
+#ifndef NDEBUG
+ // Machine function handle.
+ MachineFunction* MF;
+
+ // Flag indicating that the current function
+ // has at least one "short" path in the machine
+ // CFG from the entry block to an exit block.
+ bool HasFastExitPath;
+#endif
+
+ bool calculateSets(MachineFunction &Fn);
+ bool calcAnticInOut(MachineBasicBlock* MBB);
+ bool calcAvailInOut(MachineBasicBlock* MBB);
+ void calculateAnticAvail(MachineFunction &Fn);
+ bool addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks);
+ bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
+ bool calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills);
+ bool calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores);
+ void placeSpillsAndRestores(MachineFunction &Fn);
+ void placeCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateCallsInformation(MachineFunction &Fn);
+ void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void scavengeFrameVirtualRegs(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+
+ // Initialize DFA sets, called before iterations.
+ void clearAnticAvailSets();
+ // Clear all sets constructed by shrink wrapping.
+ void clearAllSets();
+
+ // Initialize all shrink wrapping data.
+ void initShrinkWrappingInfo();
+
+ // Convienences for dealing with machine loops.
+ MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP);
+ MachineLoop* getTopLevelLoopParent(MachineLoop *LP);
+
+ // Propgate CSRs used in MBB to all MBBs of loop LP.
+ void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
+
+ // Convenience for recognizing return blocks.
+ bool isReturnBlock(MachineBasicBlock* MBB);
+
+#ifndef NDEBUG
+ // Debugging methods.
+
+ // Mark this function as having fast exit paths.
+ void findFastExitPath();
+
+ // Verify placement of spills/restores.
+ void verifySpillRestorePlacement();
+
+ std::string getBasicBlockName(const MachineBasicBlock* MBB);
+ std::string stringifyCSRegSet(const CSRegSet& s);
+ void dumpSet(const CSRegSet& s);
+ void dumpUsed(MachineBasicBlock* MBB);
+ void dumpAllUsed();
+ void dumpSets(MachineBasicBlock* MBB);
+ void dumpSets1(MachineBasicBlock* MBB);
+ void dumpAllSets();
+ void dumpSRSets();
+#endif
+
+ };
+} // End llvm namespace
+#endif
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 0000000..8564911
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,132 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+using namespace llvm;
+
+namespace {
+struct PSVGlobalsTy {
+ // PseudoSourceValues are immutable so don't need locking.
+ const PseudoSourceValue PSVs[4];
+ sys::Mutex Lock; // Guards FSValues, but not the values inside it.
+ std::map<int, const PseudoSourceValue *> FSValues;
+
+ PSVGlobalsTy() : PSVs() {}
+ ~PSVGlobalsTy() {
+ for (std::map<int, const PseudoSourceValue *>::iterator
+ I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
+ delete I->second;
+ }
+ }
+};
+
+static ManagedStatic<PSVGlobalsTy> PSVGlobals;
+
+} // anonymous namespace
+
+const PseudoSourceValue *PseudoSourceValue::getStack()
+{ return &PSVGlobals->PSVs[0]; }
+const PseudoSourceValue *PseudoSourceValue::getGOT()
+{ return &PSVGlobals->PSVs[1]; }
+const PseudoSourceValue *PseudoSourceValue::getJumpTable()
+{ return &PSVGlobals->PSVs[2]; }
+const PseudoSourceValue *PseudoSourceValue::getConstantPool()
+{ return &PSVGlobals->PSVs[3]; }
+
+static const char *const PSVNames[] = {
+ "Stack",
+ "GOT",
+ "JumpTable",
+ "ConstantPool"
+};
+
+// FIXME: THIS IS A HACK!!!!
+// Eventually these should be uniqued on LLVMContext rather than in a managed
+// static. For now, we can safely use the global context for the time being to
+// squeak by.
+PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
+ Value(Type::getInt8PtrTy(getGlobalContext()),
+ Subclass) {}
+
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+ O << PSVNames[this - PSVGlobals->PSVs];
+}
+
+const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
+ PSVGlobalsTy &PG = *PSVGlobals;
+ sys::ScopedLock locked(PG.Lock);
+ const PseudoSourceValue *&V = PG.FSValues[FI];
+ if (!V)
+ V = new FixedStackPseudoSourceValue(FI);
+ return V;
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ if (this == getStack())
+ return false;
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return true;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+}
+
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ if (this == getStack() ||
+ this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return false;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+}
+
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return false;
+ return true;
+}
+
+bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+ return MFI && MFI->isImmutableObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ // Negative frame indices are used for special things that don't
+ // appear in LLVM IR. Non-negative indices may be used for things
+ // like static allocas.
+ if (!MFI)
+ return FI >= 0;
+ // Spill slots should not alias others.
+ return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (!MFI)
+ return true;
+ // Spill slots will not alias any LLVM IR value.
+ return !MFI->isSpillSlotObjectIndex(FI);
+}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
new file mode 100644
index 0000000..c035590
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -0,0 +1,145 @@
+//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class which provides comon functionality
+// for LiveIntervalUnion-based register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumNewQueued , "Number of new live ranges queued");
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+void RegAllocBase::init(VirtRegMap &vrm,
+ LiveIntervals &lis,
+ LiveRegMatrix &mat) {
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ Matrix = &mat;
+ MRI->freezeReservedRegs(vrm.getMachineFunction());
+ RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+ NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ enqueue(&LIS->getInterval(Reg));
+ }
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+ seedLiveRegs();
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (LiveInterval *VirtReg = dequeue()) {
+ assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+ // Unused registers can appear when the spiller coalesces snippets.
+ if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+ DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ LIS->removeInterval(VirtReg->reg);
+ continue;
+ }
+
+ // Invalidate all interference queries, live ranges could have changed.
+ Matrix->invalidateVirtRegs();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ DEBUG(dbgs() << "\nselectOrSplit "
+ << MRI->getRegClass(VirtReg->reg)->getName()
+ << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n');
+ typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+ VirtRegVec SplitVRegs;
+ unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg == ~0u) {
+ // selectOrSplit failed to find a register!
+ const char *Msg = "ran out of registers during register allocation";
+ // Probably caused by an inline asm.
+ MachineInstr *MI;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+ (MI = I.skipInstruction());)
+ if (MI->isInlineAsm())
+ break;
+ if (MI)
+ MI->emitError(Msg);
+ else
+ report_fatal_error(Msg);
+ // Keep going after reporting the error.
+ VRM->assignVirt2Phys(VirtReg->reg,
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+ continue;
+ }
+
+ if (AvailablePhysReg)
+ Matrix->assign(*VirtReg, AvailablePhysReg);
+
+ for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+ I != E; ++I) {
+ LiveInterval *SplitVirtReg = *I;
+ assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ LIS->removeInterval(SplitVirtReg->reg);
+ continue;
+ }
+ DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ "expect split value in virtual register");
+ enqueue(SplitVirtReg);
+ ++NumNewQueued;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 0000000..064e40f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,108 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class LiveRegMatrix;
+class Spiller;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They must also override enqueue/dequeue to provide an
+/// assignment order.
+class RegAllocBase {
+protected:
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ VirtRegMap *VRM;
+ LiveIntervals *LIS;
+ LiveRegMatrix *Matrix;
+ RegisterClassInfo RegClassInfo;
+
+ RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {}
+
+ virtual ~RegAllocBase() {}
+
+ // A RegAlloc pass should call this before allocatePhysRegs.
+ void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat);
+
+ // The top-level driver. The output is a VirtRegMap that us updated with
+ // physical register assignments.
+ void allocatePhysRegs();
+
+ // Get a temporary reference to a Spiller instance.
+ virtual Spiller &spiller() = 0;
+
+ /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+ virtual void enqueue(LiveInterval *LI) = 0;
+
+ /// dequeue - Return the next unassigned register, or NULL.
+ virtual LiveInterval *dequeue() = 0;
+
+ // A RegAlloc pass should override this to provide the allocation heuristics.
+ // Each call must guarantee forward progess by returning an available PhysReg
+ // or new set of split live virtual registers. It is up to the splitter to
+ // converge quickly toward fully spilled live ranges.
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+ // Use this group name for NamedRegionTimer.
+ static const char *TimerGroupName;
+
+public:
+ /// VerifyEnabled - True when -verify-regalloc is given.
+ static bool VerifyEnabled;
+
+private:
+ void seedLiveRegs();
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 0000000..0b6dc68
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,293 @@
+//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "AllocationOrder.h"
+#include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cstdlib>
+#include <queue>
+
+using namespace llvm;
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+ createBasicRegisterAllocator);
+
+namespace {
+ struct CompSpillWeight {
+ bool operator()(LiveInterval *A, LiveInterval *B) const {
+ return A->weight < B->weight;
+ }
+ };
+}
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+ // context
+ MachineFunction *MF;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
+ CompSpillWeight> Queue;
+
+ // Scratch space. Allocated here to avoid repeated malloc calls in
+ // selectOrSplit().
+ BitVector UsableRegs;
+
+public:
+ RABasic();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Basic Register Allocator";
+ }
+
+ /// RABasic analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ virtual Spiller &spiller() { return *SpillerInstance; }
+
+ virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+ virtual void enqueue(LiveInterval *LI) {
+ Queue.push(LI);
+ }
+
+ virtual LiveInterval *dequeue() {
+ if (Queue.empty())
+ return 0;
+ LiveInterval *LI = Queue.top();
+ Queue.pop();
+ return LI;
+ }
+
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+ SpillerInstance.reset(0);
+}
+
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Record each interference and determine if all are spillable before mutating
+ // either the union or live intervals.
+ SmallVector<LiveInterval*, 8> Intfs;
+
+ // Collect interferences assigned to any alias of the physical register.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ Q.collectInterferingVRegs();
+ if (Q.seenUnspillableVReg())
+ return false;
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
+ return false;
+ Intfs.push_back(Intf);
+ }
+ }
+ DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+ " interferences with " << VirtReg << "\n");
+ assert(!Intfs.empty() && "expected interference");
+
+ // Spill each interfering vreg allocated to PhysReg or an alias.
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval &Spill = *Intfs[i];
+
+ // Skip duplicates.
+ if (!VRM->hasPhys(Spill.reg))
+ continue;
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ Matrix->unassign(Spill);
+
+ // Spill the extracted interval.
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
+ spiller().spill(LRE);
+ }
+ return true;
+}
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Populate a list of physical register spill candidates.
+ SmallVector<unsigned, 8> PhysRegSpillCands;
+
+ // Check for an available register in this class.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ while (unsigned PhysReg = Order.next()) {
+ // Check for interference in PhysReg
+ switch (Matrix->checkInterference(VirtReg, PhysReg)) {
+ case LiveRegMatrix::IK_Free:
+ // PhysReg is available, allocate it.
+ return PhysReg;
+
+ case LiveRegMatrix::IK_VirtReg:
+ // Only virtual registers in the way, we may be able to spill them.
+ PhysRegSpillCands.push_back(PhysReg);
+ continue;
+
+ default:
+ // RegMask or RegUnit interference.
+ continue;
+ }
+ }
+
+ // Try to spill another interfering reg with less spill weight.
+ for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+ PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+ if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
+ continue;
+
+ assert(!Matrix->checkInterference(VirtReg, *PhysRegI) &&
+ "Interference after spill.");
+ // Tell the caller to allocate to this newly freed physical register.
+ return *PhysRegI;
+ }
+
+ // No other spill candidates were found, so spill the current VirtReg.
+ DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+ if (!VirtReg.isSpillable())
+ return ~0u;
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
+ spiller().spill(LRE);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << mf.getName() << '\n');
+
+ MF = &mf;
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+
+ allocatePhysRegs();
+
+ // Diagnostic output before rewriting
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+ releaseMemory();
+ return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+ return new RABasic();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 0000000..bb9c05c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,1117 @@
+//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
+
+static RegisterRegAlloc
+ fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+ class RAFast : public MachineFunctionPass {
+ public:
+ static char ID;
+ RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
+ isBulkSpilling(false) {}
+ private:
+ const TargetMachine *TM;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
+
+ // Basic block currently being allocated.
+ MachineBasicBlock *MBB;
+
+ // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ // values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ // Everything we know about a live virtual register.
+ struct LiveReg {
+ MachineInstr *LastUse; // Last instr to use reg.
+ unsigned VirtReg; // Virtual register number.
+ unsigned PhysReg; // Currently held here.
+ unsigned short LastOpNum; // OpNum on LastUse.
+ bool Dirty; // Register needs spill.
+
+ explicit LiveReg(unsigned v)
+ : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {}
+
+ unsigned getSparseSetIndex() const {
+ return TargetRegisterInfo::virtReg2Index(VirtReg);
+ }
+ };
+
+ typedef SparseSet<LiveReg> LiveRegMap;
+
+ // LiveVirtRegs - This map contains entries for each virtual register
+ // that is currently available in a physical register.
+ LiveRegMap LiveVirtRegs;
+
+ DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
+
+ // RegState - Track the state of a physical register.
+ enum RegState {
+ // A disabled register is not available for allocation, but an alias may
+ // be in use. A register can only be moved out of the disabled state if
+ // all aliases are disabled.
+ regDisabled,
+
+ // A free register is not currently in use and can be allocated
+ // immediately without checking aliases.
+ regFree,
+
+ // A reserved register has been assigned explicitly (e.g., setting up a
+ // call parameter), and it remains reserved until it is used.
+ regReserved
+
+ // A register state may also be a virtual register number, indication that
+ // the physical register is currently allocated to a virtual register. In
+ // that case, LiveVirtRegs contains the inverse mapping.
+ };
+
+ // PhysRegState - One of the RegState enums, or a virtreg.
+ std::vector<unsigned> PhysRegState;
+
+ // Set of register units.
+ typedef SparseSet<unsigned> UsedInInstrSet;
+
+ // Set of register units that are used in the current instruction, and so
+ // cannot be allocated.
+ UsedInInstrSet UsedInInstr;
+
+ // Mark a physreg as used in this instruction.
+ void markRegUsedInInstr(unsigned PhysReg) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ UsedInInstr.insert(*Units);
+ }
+
+ // Check if a physreg or any of its aliases are used in this instruction.
+ bool isRegUsedInInstr(unsigned PhysReg) const {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (UsedInInstr.count(*Units))
+ return true;
+ return false;
+ }
+
+ // SkippedInstrs - Descriptors of instructions whose clobber list was
+ // ignored because all registers were spilled. It is still necessary to
+ // mark all the clobbered registers as used by the function.
+ SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
+
+ // isBulkSpilling - This flag is set when LiveRegMap will be cleared
+ // completely after spilling all live registers. LiveRegMap entries should
+ // not be erased.
+ bool isBulkSpilling;
+
+ enum {
+ spillClean = 1,
+ spillDirty = 100,
+ spillImpossible = ~0u
+ };
+ public:
+ virtual const char *getPassName() const {
+ return "Fast Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool runOnMachineFunction(MachineFunction &Fn);
+ void AllocateBasicBlock();
+ void handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead);
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+ bool isLastUseOfLocalReg(MachineOperand&);
+
+ void addKillFlag(const LiveReg&);
+ void killVirtReg(LiveRegMap::iterator);
+ void killVirtReg(unsigned VirtReg);
+ void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+ void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+
+ void usePhysReg(MachineOperand&);
+ void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
+ unsigned calcSpillCost(unsigned PhysReg) const;
+ void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
+ LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
+ LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator,
+ unsigned Hint);
+ LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ void spillAll(MachineBasicBlock::iterator MI);
+ bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+ };
+ char RAFast::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int SS = StackSlotForVirtReg[VirtReg];
+ if (SS != -1)
+ return SS; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot.
+ StackSlotForVirtReg[VirtReg] = FrameIdx;
+ return FrameIdx;
+}
+
+/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
+/// its virtual register, and it is guaranteed to be a block-local register.
+///
+bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+ // If the register has ever been spilled or reloaded, we conservatively assume
+ // it is a global register used in multiple blocks.
+ if (StackSlotForVirtReg[MO.getReg()] != -1)
+ return false;
+
+ // Check that the use/def chain has exactly one operand - MO.
+ MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
+ if (&I.getOperand() != &MO)
+ return false;
+ return ++I == MRI->reg_nodbg_end();
+}
+
+/// addKillFlag - Set kill flags on last use of a virtual register.
+void RAFast::addKillFlag(const LiveReg &LR) {
+ if (!LR.LastUse) return;
+ MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
+ if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
+ if (MO.getReg() == LR.PhysReg)
+ MO.setIsKill();
+ else
+ LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
+ }
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+ addKillFlag(*LRI);
+ assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+ "Broken RegState mapping");
+ PhysRegState[LRI->PhysReg] = regFree;
+ // Erase from LiveVirtRegs unless we're spilling in bulk.
+ if (!isBulkSpilling)
+ LiveVirtRegs.erase(LRI);
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "killVirtReg needs a virtual register");
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ if (LRI != LiveVirtRegs.end())
+ killVirtReg(LRI);
+}
+
+/// spillVirtReg - This method spills the value specified by VirtReg into the
+/// corresponding stack slot if needed.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Spilling a physical register is illegal!");
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
+ spillVirtReg(MI, LRI);
+}
+
+/// spillVirtReg - Do the actual work of spilling.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
+ LiveRegMap::iterator LRI) {
+ LiveReg &LR = *LRI;
+ assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
+
+ if (LR.Dirty) {
+ // If this physreg is used by the instruction, we want to kill it on the
+ // instruction, not on the spill.
+ bool SpillKill = LR.LastUse != MI;
+ LR.Dirty = false;
+ DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
+ << " in " << PrintReg(LR.PhysReg, TRI));
+ const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg);
+ int FI = getStackSpaceFor(LRI->VirtReg, RC);
+ DEBUG(dbgs() << " to stack slot #" << FI << "\n");
+ TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+ ++NumStores; // Update statistics
+
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // identify spilled location as the place to find corresponding variable's
+ // value.
+ SmallVector<MachineInstr *, 4> &LRIDbgValues =
+ LiveDbgValueMap[LRI->VirtReg];
+ for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
+ MachineInstr *DBG = LRIDbgValues[li];
+ const MDNode *MDPtr =
+ DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
+ int64_t Offset = 0;
+ if (DBG->getOperand(1).isImm())
+ Offset = DBG->getOperand(1).getImm();
+ DebugLoc DL;
+ if (MI == MBB->end()) {
+ // If MI is at basic block end then use last instruction's location.
+ MachineBasicBlock::iterator EI = MI;
+ DL = (--EI)->getDebugLoc();
+ }
+ else
+ DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) {
+ MachineBasicBlock *MBB = DBG->getParent();
+ MBB->insert(MI, NewDV);
+ DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
+ }
+ }
+ // Now this register is spilled there is should not be any DBG_VALUE
+ // pointing to this register because they are all pointing to spilled value
+ // now.
+ LRIDbgValues.clear();
+ if (SpillKill)
+ LR.LastUse = 0; // Don't kill register again
+ }
+ killVirtReg(LRI);
+}
+
+/// spillAll - Spill all dirty virtregs without killing them.
+void RAFast::spillAll(MachineBasicBlock::iterator MI) {
+ if (LiveVirtRegs.empty()) return;
+ isBulkSpilling = true;
+ // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
+ // of spilling here is deterministic, if arbitrary.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
+ i != e; ++i)
+ spillVirtReg(MI, i);
+ LiveVirtRegs.clear();
+ isBulkSpilling = false;
+}
+
+/// usePhysReg - Handle the direct use of a physical register.
+/// Check that the register is not used by a virtreg.
+/// Kill the physreg, marking it free.
+/// This may add implicit kills to MO->getParent() and invalidate MO.
+void RAFast::usePhysReg(MachineOperand &MO) {
+ unsigned PhysReg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ "Bad usePhysReg operand");
+ markRegUsedInInstr(PhysReg);
+ switch (PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ PhysRegState[PhysReg] = regFree;
+ // Fall through
+ case regFree:
+ MO.setIsKill();
+ return;
+ default:
+ // The physreg was allocated to a virtual register. That means the value we
+ // wanted has been clobbered.
+ llvm_unreachable("Instruction uses an allocated register");
+ }
+
+ // Maybe a superregister is reserved?
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
+ switch (PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ assert(TRI->isSuperRegister(PhysReg, Alias) &&
+ "Instruction is not using a subregister of a reserved register");
+ // Leave the superregister in the working set.
+ PhysRegState[Alias] = regFree;
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ case regFree:
+ if (TRI->isSuperRegister(PhysReg, Alias)) {
+ // Leave the superregister in the working set.
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ }
+ // Some other alias was in the working set - clear it.
+ PhysRegState[Alias] = regDisabled;
+ break;
+ default:
+ llvm_unreachable("Instruction uses an alias of an allocated register");
+ }
+ }
+
+ // All aliases are disabled, bring register into working set.
+ PhysRegState[PhysReg] = regFree;
+ MO.setIsKill();
+}
+
+/// definePhysReg - Mark PhysReg as reserved or free after spilling any
+/// virtregs. This is very similar to defineVirtReg except the physreg is
+/// reserved instead of allocated.
+void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
+ RegState NewState) {
+ markRegUsedInInstr(PhysReg);
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[PhysReg] = NewState;
+ return;
+ }
+
+ // This is a disabled register, disable all aliases.
+ PhysRegState[PhysReg] = NewState;
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[Alias] = regDisabled;
+ if (TRI->isSuperRegister(PhysReg, Alias))
+ return;
+ break;
+ }
+ }
+}
+
+
+// calcSpillCost - Return the cost of spilling clearing out PhysReg and
+// aliases so it is free for allocation.
+// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
+// can be allocated directly.
+// Returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+ if (isRegUsedInInstr(PhysReg)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
+ return spillImpossible;
+ }
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regFree:
+ return 0;
+ case regReserved:
+ DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
+ << PrintReg(PhysReg, TRI) << " is reserved already.\n");
+ return spillImpossible;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ return I->Dirty ? spillDirty : spillClean;
+ }
+ }
+
+ // This is a disabled register, add up cost of aliases.
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
+ unsigned Cost = 0;
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regFree:
+ ++Cost;
+ break;
+ case regReserved:
+ return spillImpossible;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ Cost += I->Dirty ? spillDirty : spillClean;
+ break;
+ }
+ }
+ }
+ return Cost;
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) {
+ DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to "
+ << PrintReg(PhysReg, TRI) << "\n");
+ PhysRegState[PhysReg] = LR.VirtReg;
+ assert(!LR.PhysReg && "Already assigned a physreg");
+ LR.PhysReg = PhysReg;
+}
+
+RAFast::LiveRegMap::iterator
+RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
+}
+
+/// allocVirtReg - Allocate a physical register for VirtReg.
+RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
+ LiveRegMap::iterator LRI,
+ unsigned Hint) {
+ const unsigned VirtReg = LRI->VirtReg;
+
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Can only allocate virtual registers");
+
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+ // Ignore invalid hints.
+ if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+ !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
+ Hint = 0;
+
+ // Take hint when possible.
+ if (Hint) {
+ // Ignore the hint if we would have to spill a dirty register.
+ unsigned Cost = calcSpillCost(Hint);
+ if (Cost < spillDirty) {
+ if (Cost)
+ definePhysReg(MI, Hint, regFree);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, Hint);
+ }
+ }
+
+ ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC);
+
+ // First try to find a completely free register.
+ for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
+ unsigned PhysReg = *I;
+ if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
+ }
+ }
+
+ DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+ << RC->getName() << "\n");
+
+ unsigned BestReg = 0, BestCost = spillImpossible;
+ for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
+ unsigned Cost = calcSpillCost(*I);
+ DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
+ DEBUG(dbgs() << "\tCost: " << Cost << "\n");
+ DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
+ // Cost is 0 when all aliases are already disabled.
+ if (Cost == 0) {
+ assignVirtToPhysReg(*LRI, *I);
+ return LRI;
+ }
+ if (Cost < BestCost)
+ BestReg = *I, BestCost = Cost;
+ }
+
+ if (BestReg) {
+ definePhysReg(MI, BestReg, regFree);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, BestReg);
+ }
+
+ // Nothing we can do. Report an error and keep going with a bad allocation.
+ MI->emitError("ran out of registers during register allocation");
+ definePhysReg(MI, *AO.begin(), regFree);
+ return assignVirtToPhysReg(VirtReg, *AO.begin());
+}
+
+/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
+RAFast::LiveRegMap::iterator
+RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+ if (New) {
+ // If there is no hint, peek at the only use of this register.
+ if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+ MRI->hasOneNonDBGUse(VirtReg)) {
+ const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
+ // It's a copy, use the destination register as a hint.
+ if (UseMI.isCopyLike())
+ Hint = UseMI.getOperand(0).getReg();
+ }
+ LRI = allocVirtReg(MI, LRI, Hint);
+ } else if (LRI->LastUse) {
+ // Redefining a live register - kill at the last use, unless it is this
+ // instruction defining VirtReg multiple times.
+ if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
+ addKillFlag(*LRI);
+ }
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = MI;
+ LRI->LastOpNum = OpNum;
+ LRI->Dirty = true;
+ markRegUsedInInstr(LRI->PhysReg);
+ return LRI;
+}
+
+/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
+RAFast::LiveRegMap::iterator
+RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+ MachineOperand &MO = MI->getOperand(OpNum);
+ if (New) {
+ LRI = allocVirtReg(MI, LRI, Hint);
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+ << PrintReg(LRI->PhysReg, TRI) << "\n");
+ TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI);
+ ++NumLoads;
+ } else if (LRI->Dirty) {
+ if (isLastUseOfLocalReg(MO)) {
+ DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+ if (MO.isUse())
+ MO.setIsKill();
+ else
+ MO.setIsDead();
+ } else if (MO.isKill()) {
+ DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+ MO.setIsDead(false);
+ }
+ } else if (MO.isKill()) {
+ // We must remove kill flags from uses of reloaded registers because the
+ // register would be killed immediately, and there might be a second use:
+ // %foo = OR %x<kill>, %x
+ // This would cause a second reload of %x into a different register.
+ DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+ MO.setIsDead(false);
+ }
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = MI;
+ LRI->LastOpNum = OpNum;
+ markRegUsedInInstr(LRI->PhysReg);
+ return LRI;
+}
+
+// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
+// subregs. This may invalidate any operand pointers.
+// Return true if the operand kills its register.
+bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
+ MachineOperand &MO = MI->getOperand(OpNum);
+ bool Dead = MO.isDead();
+ if (!MO.getSubReg()) {
+ MO.setReg(PhysReg);
+ return MO.isKill() || Dead;
+ }
+
+ // Handle subregister index.
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+ MO.setSubReg(0);
+
+ // A kill flag implies killing the full register. Add corresponding super
+ // register kill.
+ if (MO.isKill()) {
+ MI->addRegisterKilled(PhysReg, TRI, true);
+ return true;
+ }
+
+ // A <def,read-undef> of a sub-register requires an implicit def of the full
+ // register.
+ if (MO.isDef() && MO.isUndef())
+ MI->addRegisterDefined(PhysReg, TRI);
+
+ return Dead;
+}
+
+// Handle special instruction operand like early clobbers and tied ops when
+// there are additional physreg defines.
+void RAFast::handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead) {
+ DEBUG(dbgs() << "Scanning for through registers:");
+ SmallSet<unsigned, 8> ThroughRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
+ (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+ if (ThroughRegs.insert(Reg))
+ DEBUG(dbgs() << ' ' << PrintReg(Reg));
+ }
+ }
+
+ // If any physreg defines collide with preallocated through registers,
+ // we must spill and reallocate.
+ DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ markRegUsedInInstr(Reg);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (ThroughRegs.count(PhysRegState[*AI]))
+ definePhysReg(MI, *AI, regFree);
+ }
+ }
+
+ SmallVector<unsigned, 8> PartialDefs;
+ DEBUG(dbgs() << "Allocating tied uses.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ unsigned DefIdx = 0;
+ if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
+ DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
+ << DefIdx << ".\n");
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->PhysReg;
+ setPhysReg(MI, i, PhysReg);
+ // Note: we don't update the def operand yet. That would cause the normal
+ // def-scan to attempt spilling.
+ } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+ DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+ // Reload the register, but don't assign to the operand just yet.
+ // That would confuse the later phys-def processing pass.
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ PartialDefs.push_back(LRI->PhysReg);
+ }
+ }
+
+ DEBUG(dbgs() << "Allocating early clobbers.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (!MO.isEarlyClobber())
+ continue;
+ // Note: defineVirtReg may invalidate MO.
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, i, PhysReg))
+ VirtDead.push_back(Reg);
+ }
+
+ // Restore UsedInInstr to a state usable for allocating normal virtual uses.
+ UsedInInstr.clear();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+ << " as used in instr\n");
+ markRegUsedInInstr(Reg);
+ }
+
+ // Also mark PartialDefs as used to avoid reallocation.
+ for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
+ markRegUsedInInstr(PartialDefs[i]);
+}
+
+void RAFast::AllocateBasicBlock() {
+ DEBUG(dbgs() << "\nAllocating " << *MBB);
+
+ PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+ assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
+
+ MachineBasicBlock::iterator MII = MBB->begin();
+
+ // Add live-in registers as live.
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ if (MRI->isAllocatable(*I))
+ definePhysReg(MII, *I, regReserved);
+
+ SmallVector<unsigned, 8> VirtDead;
+ SmallVector<MachineInstr*, 32> Coalesced;
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ while (MII != MBB->end()) {
+ MachineInstr *MI = MII++;
+ const MCInstrDesc &MCID = MI->getDesc();
+ DEBUG({
+ dbgs() << "\n>> " << *MI << "Regs:";
+ for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+ if (PhysRegState[Reg] == regDisabled) continue;
+ dbgs() << " " << TRI->getName(Reg);
+ switch(PhysRegState[Reg]) {
+ case regFree:
+ break;
+ case regReserved:
+ dbgs() << "*";
+ break;
+ default: {
+ dbgs() << '=' << PrintReg(PhysRegState[Reg]);
+ LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ if (I->Dirty)
+ dbgs() << "*";
+ assert(I->PhysReg == Reg && "Bad inverse map");
+ break;
+ }
+ }
+ }
+ dbgs() << '\n';
+ // Check that LiveVirtRegs is the inverse.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+ e = LiveVirtRegs.end(); i != e; ++i) {
+ assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
+ "Bad map key");
+ assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
+ "Bad map value");
+ assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
+ }
+ });
+
+ // Debug values are not allowed to change codegen in any way.
+ if (MI->isDebugValue()) {
+ bool ScanDbgValue = true;
+ while (ScanDbgValue) {
+ ScanDbgValue = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+ if (LRI != LiveVirtRegs.end())
+ setPhysReg(MI, i, LRI->PhysReg);
+ else {
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS == -1) {
+ // We can't allocate a physreg for a DebugValue, sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
+ else {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ int64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" <<
+ "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ // Scan NewDV operands from the beginning.
+ MI = NewDV;
+ ScanDbgValue = true;
+ break;
+ } else {
+ // We can't allocate a physreg for a DebugValue; sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
+ }
+ }
+ LiveDbgValueMap[Reg].push_back(MI);
+ }
+ }
+ // Next instruction.
+ continue;
+ }
+
+ // If this is a copy, we may be able to coalesce.
+ unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
+ if (MI->isCopy()) {
+ CopyDst = MI->getOperand(0).getReg();
+ CopySrc = MI->getOperand(1).getReg();
+ CopyDstSub = MI->getOperand(0).getSubReg();
+ CopySrcSub = MI->getOperand(1).getSubReg();
+ }
+
+ // Track registers used by instruction.
+ UsedInInstr.clear();
+
+ // First scan.
+ // Mark physreg uses and early clobbers as used.
+ // Find the end of the virtreg operands
+ unsigned VirtOpEnd = 0;
+ bool hasTiedOps = false;
+ bool hasEarlyClobbers = false;
+ bool hasPartialRedefs = false;
+ bool hasPhysDefs = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask()) {
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+ continue;
+ }
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg) continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ VirtOpEnd = i+1;
+ if (MO.isUse()) {
+ hasTiedOps = hasTiedOps ||
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+ } else {
+ if (MO.isEarlyClobber())
+ hasEarlyClobbers = true;
+ if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+ hasPartialRedefs = true;
+ }
+ continue;
+ }
+ if (!MRI->isAllocatable(Reg)) continue;
+ if (MO.isUse()) {
+ usePhysReg(MO);
+ } else if (MO.isEarlyClobber()) {
+ definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+ regFree : regReserved);
+ hasEarlyClobbers = true;
+ } else
+ hasPhysDefs = true;
+ }
+
+ // The instruction may have virtual register operands that must be allocated
+ // the same register at use-time and def-time: early clobbers and tied
+ // operands. If there are also physical defs, these registers must avoid
+ // both physical defs and uses, making them more constrained than normal
+ // operands.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
+ // We didn't detect inline asm tied operands above, so just make this extra
+ // pass for all inline asm.
+ if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
+ handleThroughOperands(MI, VirtDead);
+ // Don't attempt coalescing when we have funny stuff going on.
+ CopyDst = 0;
+ // Pretend we have early clobbers so the use operands get marked below.
+ // This is not necessary for the common case of a single tied use.
+ hasEarlyClobbers = true;
+ }
+
+ // Second scan.
+ // Allocate virtreg uses.
+ for (unsigned i = 0; i != VirtOpEnd; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
+ unsigned PhysReg = LRI->PhysReg;
+ CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
+ if (setPhysReg(MI, i, PhysReg))
+ killVirtReg(LRI);
+ }
+ }
+
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setRegUnitUsed(*I);
+
+ // Track registers defined by instruction - early clobbers and tied uses at
+ // this point.
+ UsedInInstr.clear();
+ if (hasEarlyClobbers) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ // Look for physreg defs and tied uses.
+ if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
+ markRegUsedInInstr(Reg);
+ }
+ }
+
+ unsigned DefOpEnd = MI->getNumOperands();
+ if (MI->isCall()) {
+ // Spill all virtregs before a call. This serves two purposes: 1. If an
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots, and 2. we don't have to wade through
+ // all the <imp-def> operands on the call instruction.
+ DefOpEnd = VirtOpEnd;
+ DEBUG(dbgs() << " Spilling remaining registers before call.\n");
+ spillAll(MI);
+
+ // The imp-defs are skipped below, but we still need to mark those
+ // registers as used by the function.
+ SkippedInstrs.insert(&MCID);
+ }
+
+ // Third scan.
+ // Allocate defs and collect dead defs.
+ for (unsigned i = 0; i != DefOpEnd; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!MRI->isAllocatable(Reg)) continue;
+ definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+ regFree : regReserved);
+ continue;
+ }
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
+ unsigned PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, i, PhysReg)) {
+ VirtDead.push_back(Reg);
+ CopyDst = 0; // cancel coalescing;
+ } else
+ CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
+ }
+
+ // Kill dead defs after the scan to ensure that multiple defs of the same
+ // register are allocated identically. We didn't need to do this for uses
+ // because we are crerating our own kill flags, and they are always at the
+ // last use.
+ for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
+ killVirtReg(VirtDead[i]);
+ VirtDead.clear();
+
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setRegUnitUsed(*I);
+
+ if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
+ DEBUG(dbgs() << "-- coalescing: " << *MI);
+ Coalesced.push_back(MI);
+ } else {
+ DEBUG(dbgs() << "<< " << *MI);
+ }
+ }
+
+ // Spill all physical registers holding virtual registers now.
+ DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+ spillAll(MBB->getFirstTerminator());
+
+ // Erase all the coalesced copies. We are delaying it until now because
+ // LiveVirtRegs might refer to the instrs.
+ for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
+ MBB->erase(Coalesced[i]);
+ NumCopies += Coalesced.size();
+
+ DEBUG(MBB->dump());
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+ DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+ << "********** Function: " << Fn.getName() << '\n');
+ MF = &Fn;
+ MRI = &MF->getRegInfo();
+ TM = &Fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ MRI->freezeReservedRegs(Fn);
+ RegClassInfo.runOnMachineFunction(Fn);
+ UsedInInstr.clear();
+ UsedInInstr.setUniverse(TRI->getNumRegUnits());
+
+ assert(!MRI->isSSA() && "regalloc requires leaving SSA");
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+ LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
+ MBBi != MBBe; ++MBBi) {
+ MBB = &*MBBi;
+ AllocateBasicBlock();
+ }
+
+ // Add the clobber lists for all the instructions we skipped earlier.
+ for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
+ I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
+ if (const uint16_t *Defs = (*I)->getImplicitDefs())
+ while (*Defs)
+ MRI->setPhysRegUsed(*Defs++);
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers.
+ MRI->clearVirtRegs();
+
+ SkippedInstrs.clear();
+ StackSlotForVirtReg.clear();
+ LiveDbgValueMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+ return new RAFast();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 0000000..6d84176
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1791 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "AllocationOrder.h"
+#include "InterferenceCache.h"
+#include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
+#include "SpillPlacement.h"
+#include "Spiller.h"
+#include "SplitKit.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits, "Number of split local live ranges");
+STATISTIC(NumEvicted, "Number of interferences evicted");
+
+static cl::opt<SplitEditor::ComplementSpillMode>
+SplitSpillMode("split-spill-mode", cl::Hidden,
+ cl::desc("Spill mode for splitting live ranges"),
+ cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
+ clEnumValEnd),
+ cl::init(SplitEditor::SM_Partition));
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+ createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
+
+ // context
+ MachineFunction *MF;
+
+ // analyses
+ SlotIndexes *Indexes;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ EdgeBundles *Bundles;
+ SpillPlacement *SpillPlacer;
+ LiveDebugVariables *DebugVars;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+ unsigned NextCascade;
+
+ // Live ranges pass through a number of stages as we try to allocate them.
+ // Some of the stages may also create new live ranges:
+ //
+ // - Region splitting.
+ // - Per-block splitting.
+ // - Local splitting.
+ // - Spilling.
+ //
+ // Ranges produced by one of the stages skip the previous stages when they are
+ // dequeued. This improves performance because we can skip interference checks
+ // that are unlikely to give any results. It also guarantees that the live
+ // range splitting algorithm terminates, something that is otherwise hard to
+ // ensure.
+ enum LiveRangeStage {
+ /// Newly created live range that has never been queued.
+ RS_New,
+
+ /// Only attempt assignment and eviction. Then requeue as RS_Split.
+ RS_Assign,
+
+ /// Attempt live range splitting if assignment is impossible.
+ RS_Split,
+
+ /// Attempt more aggressive live range splitting that is guaranteed to make
+ /// progress. This is used for split products that may not be making
+ /// progress.
+ RS_Split2,
+
+ /// Live range will be spilled. No more splitting will be attempted.
+ RS_Spill,
+
+ /// There is nothing more we can do to this live range. Abort compilation
+ /// if it can't be assigned.
+ RS_Done
+ };
+
+ static const char *const StageName[];
+
+ // RegInfo - Keep additional information about each live range.
+ struct RegInfo {
+ LiveRangeStage Stage;
+
+ // Cascade - Eviction loop prevention. See canEvictInterference().
+ unsigned Cascade;
+
+ RegInfo() : Stage(RS_New), Cascade(0) {}
+ };
+
+ IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
+
+ LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+ return ExtraRegInfo[VirtReg.reg].Stage;
+ }
+
+ void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ ExtraRegInfo[VirtReg.reg].Stage = Stage;
+ }
+
+ template<typename Iterator>
+ void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ for (;Begin != End; ++Begin) {
+ unsigned Reg = (*Begin)->reg;
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = NewStage;
+ }
+ }
+
+ /// Cost of evicting interference.
+ struct EvictionCost {
+ unsigned BrokenHints; ///< Total number of broken hints.
+ float MaxWeight; ///< Maximum spill weight evicted.
+
+ EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
+
+ bool operator<(const EvictionCost &O) const {
+ if (BrokenHints != O.BrokenHints)
+ return BrokenHints < O.BrokenHints;
+ return MaxWeight < O.MaxWeight;
+ }
+ };
+
+ // splitting state.
+ std::auto_ptr<SplitAnalysis> SA;
+ std::auto_ptr<SplitEditor> SE;
+
+ /// Cached per-block interference maps
+ InterferenceCache IntfCache;
+
+ /// All basic blocks where the current register has uses.
+ SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+ /// Global live range splitting candidate info.
+ struct GlobalSplitCandidate {
+ // Register intended for assignment, or 0.
+ unsigned PhysReg;
+
+ // SplitKit interval index for this candidate.
+ unsigned IntvIdx;
+
+ // Interference for PhysReg.
+ InterferenceCache::Cursor Intf;
+
+ // Bundles where this candidate should be live.
+ BitVector LiveBundles;
+ SmallVector<unsigned, 8> ActiveBlocks;
+
+ void reset(InterferenceCache &Cache, unsigned Reg) {
+ PhysReg = Reg;
+ IntvIdx = 0;
+ Intf.setPhysReg(Cache, Reg);
+ LiveBundles.clear();
+ ActiveBlocks.clear();
+ }
+
+ // Set B[i] = C for every live bundle where B[i] was NoCand.
+ unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
+ unsigned Count = 0;
+ for (int i = LiveBundles.find_first(); i >= 0;
+ i = LiveBundles.find_next(i))
+ if (B[i] == NoCand) {
+ B[i] = C;
+ Count++;
+ }
+ return Count;
+ }
+ };
+
+ /// Candidate info for for each PhysReg in AllocationOrder.
+ /// This vector never shrinks, but grows to the size of the largest register
+ /// class.
+ SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+
+ enum { NoCand = ~0u };
+
+ /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
+ /// NoCand which indicates the stack interval.
+ SmallVector<unsigned, 32> BundleCand;
+
+public:
+ RAGreedy();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Greedy Register Allocator";
+ }
+
+ /// RAGreedy analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void releaseMemory();
+ virtual Spiller &spiller() { return *SpillerInstance; }
+ virtual void enqueue(LiveInterval *LI);
+ virtual LiveInterval *dequeue();
+ virtual unsigned selectOrSplit(LiveInterval&,
+ SmallVectorImpl<LiveInterval*>&);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+
+private:
+ bool LRE_CanEraseVirtReg(unsigned);
+ void LRE_WillShrinkVirtReg(unsigned);
+ void LRE_DidCloneVirtReg(unsigned, unsigned);
+
+ float calcSpillCost();
+ bool addSplitConstraints(InterferenceCache::Cursor, float&);
+ void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ void growRegion(GlobalSplitCandidate &Cand);
+ float calcGlobalSplitCost(GlobalSplitCandidate&);
+ bool calcCompactRegion(GlobalSplitCandidate&);
+ void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
+ void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+ bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
+ bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+ void evictInterference(LiveInterval&, unsigned,
+ SmallVectorImpl<LiveInterval*>&);
+
+ unsigned tryAssign(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryEvict(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
+ unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned trySplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+#ifndef NDEBUG
+const char *const RAGreedy::StageName[] = {
+ "RS_New",
+ "RS_Assign",
+ "RS_Split",
+ "RS_Split2",
+ "RS_Spill",
+ "RS_Done"
+};
+#endif
+
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = 0.98f;
+
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+ return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
+ AU.addRequired<EdgeBundles>();
+ AU.addRequired<SpillPlacement>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
+//===----------------------------------------------------------------------===//
+// LiveRangeEdit delegate methods
+//===----------------------------------------------------------------------===//
+
+bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ if (VRM->hasPhys(VirtReg)) {
+ Matrix->unassign(LIS->getInterval(VirtReg));
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ return false;
+}
+
+void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+ if (!VRM->hasPhys(VirtReg))
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ enqueue(&LI);
+}
+
+void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+ // Cloning a register we haven't even heard about yet? Just ignore it.
+ if (!ExtraRegInfo.inBounds(Old))
+ return;
+
+ // LRE may clone a virtual register because dead code elimination causes it to
+ // be split into connected components. The new components are much smaller
+ // than the original, so they should get a new chance at being assigned.
+ // same stage as the parent.
+ ExtraRegInfo[Old].Stage = RS_Assign;
+ ExtraRegInfo.grow(New);
+ ExtraRegInfo[New] = ExtraRegInfo[Old];
+}
+
+void RAGreedy::releaseMemory() {
+ SpillerInstance.reset(0);
+ ExtraRegInfo.clear();
+ GlobalCand.clear();
+}
+
+void RAGreedy::enqueue(LiveInterval *LI) {
+ // Prioritize live ranges by size, assigning larger ranges first.
+ // The queue holds (size, reg) pairs.
+ const unsigned Size = LI->getSize();
+ const unsigned Reg = LI->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only enqueue virtual registers");
+ unsigned Prio;
+
+ ExtraRegInfo.grow(Reg);
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = RS_Assign;
+
+ if (ExtraRegInfo[Reg].Stage == RS_Split) {
+ // Unsplit ranges that couldn't be allocated immediately are deferred until
+ // everything else has been allocated.
+ Prio = Size;
+ } else {
+ // Everything is allocated in long->short order. Long ranges that don't fit
+ // should be spilled (or split) ASAP so they don't create interference.
+ Prio = (1u << 31) + Size;
+
+ // Boost ranges that have a physical register hint.
+ if (VRM->hasKnownPreference(Reg))
+ Prio |= (1u << 30);
+ }
+
+ Queue.push(std::make_pair(Prio, ~Reg));
+}
+
+LiveInterval *RAGreedy::dequeue() {
+ if (Queue.empty())
+ return 0;
+ LiveInterval *LI = &LIS->getInterval(~Queue.top().second);
+ Queue.pop();
+ return LI;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Direct Assignment
+//===----------------------------------------------------------------------===//
+
+/// tryAssign - Try to assign VirtReg to an available register.
+unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ Order.rewind();
+ unsigned PhysReg;
+ while ((PhysReg = Order.next()))
+ if (!Matrix->checkInterference(VirtReg, PhysReg))
+ break;
+ if (!PhysReg || Order.isHint())
+ return PhysReg;
+
+ // PhysReg is available, but there may be a better choice.
+
+ // If we missed a simple hint, try to cheaply evict interference from the
+ // preferred register.
+ if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
+ if (Order.isHint(Hint)) {
+ DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+ EvictionCost MaxCost(1);
+ if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+ evictInterference(VirtReg, Hint, NewVRegs);
+ return Hint;
+ }
+ }
+
+ // Try to evict interference from a cheaper alternative.
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+
+ // Most registers have 0 additional cost.
+ if (!Cost)
+ return PhysReg;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+ << '\n');
+ unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+ return CheapReg ? CheapReg : PhysReg;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Interference eviction
+//===----------------------------------------------------------------------===//
+
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
+///
+/// @param A The live range to be assigned.
+/// @param IsHint True when A is about to be assigned to its preferred
+/// register.
+/// @param B The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
+ LiveInterval &B, bool BreaksHint) {
+ bool CanSplit = getStage(B) < RS_Spill;
+
+ // Be fairly aggressive about following hints as long as the evictee can be
+ // split.
+ if (CanSplit && IsHint && !BreaksHint)
+ return true;
+
+ return A.weight > B.weight;
+}
+
+/// canEvictInterference - Return true if all interferences between VirtReg and
+/// PhysReg can be evicted. When OnlyCheap is set, don't do anything
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @param IsHint True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+/// when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ bool IsHint, EvictionCost &MaxCost) {
+ // It is only possible to evict virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
+ return false;
+
+ // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+ // involved in an eviction before. If a cascade number was assigned, deny
+ // evicting anything with the same or a newer cascade number. This prevents
+ // infinite eviction loops.
+ //
+ // This works out so a register without a cascade number is allowed to evict
+ // anything, and it can be evicted by anything.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = NextCascade;
+
+ EvictionCost Cost;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ // If there is 10 or more interferences, chances are one is heavier.
+ if (Q.collectInterferingVRegs(10) >= 10)
+ return false;
+
+ // Check if any interfering live range is heavier than MaxWeight.
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
+ "Only expecting virtual register interference from query");
+ // Never evict spill products. They cannot split or spill.
+ if (getStage(*Intf) == RS_Done)
+ return false;
+ // Once a live range becomes small enough, it is urgent that we find a
+ // register for it. This is indicated by an infinite spill weight. These
+ // urgent live ranges get to evict almost anything.
+ //
+ // Also allow urgent evictions of unspillable ranges from a strictly
+ // larger allocation order.
+ bool Urgent = !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
+ // Only evict older cascades or live ranges without a cascade.
+ unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+ if (Cascade <= IntfCascade) {
+ if (!Urgent)
+ return false;
+ // We permit breaking cascades for urgent evictions. It should be the
+ // last resort, though, so make it really expensive.
+ Cost.BrokenHints += 10;
+ }
+ // Would this break a satisfied hint?
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ // Update eviction cost.
+ Cost.BrokenHints += BreaksHint;
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ // Abort if this would be too expensive.
+ if (!(Cost < MaxCost))
+ return false;
+ // Finally, apply the eviction policy for non-urgent evictions.
+ if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+ return false;
+ }
+ }
+ MaxCost = Cost;
+ return true;
+}
+
+/// evictInterference - Evict any interferring registers that prevent VirtReg
+/// from being assigned to Physreg. This assumes that canEvictInterference
+/// returned true.
+void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // Make sure that VirtReg has a cascade number, and assign that cascade
+ // number to every evicted register. These live ranges than then only be
+ // evicted by a newer cascade, preventing infinite loops.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+
+ DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+ << " interference: Cascade " << Cascade << '\n');
+
+ // Collect all interfering virtregs first.
+ SmallVector<LiveInterval*, 8> Intfs;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+ ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
+ Intfs.append(IVR.begin(), IVR.end());
+ }
+
+ // Evict them second. This will invalidate the queries.
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval *Intf = Intfs[i];
+ // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
+ if (!VRM->hasPhys(Intf->reg))
+ continue;
+ Matrix->unassign(*Intf);
+ assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+ VirtReg.isSpillable() < Intf->isSpillable()) &&
+ "Cannot decrease cascade number, illegal eviction");
+ ExtraRegInfo[Intf->reg].Cascade = Cascade;
+ ++NumEvicted;
+ NewVRegs.push_back(Intf);
+ }
+}
+
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs,
+ unsigned CostPerUseLimit) {
+ NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+
+ // Keep track of the cheapest interference seen so far.
+ EvictionCost BestCost(~0u);
+ unsigned BestPhys = 0;
+ unsigned OrderLimit = Order.getOrder().size();
+
+ // When we are just looking for a reduced cost per use, don't break any
+ // hints, and only evict smaller spill weights.
+ if (CostPerUseLimit < ~0u) {
+ BestCost.BrokenHints = 0;
+ BestCost.MaxWeight = VirtReg.weight;
+
+ // Check of any registers in RC are below CostPerUseLimit.
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
+ unsigned MinCost = RegClassInfo.getMinCost(RC);
+ if (MinCost >= CostPerUseLimit) {
+ DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost
+ << ", no cheaper registers to be found.\n");
+ return 0;
+ }
+
+ // It is normal for register classes to have a long tail of registers with
+ // the same cost. We don't need to look at them if they're too expensive.
+ if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
+ OrderLimit = RegClassInfo.getLastCostChange(RC);
+ DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n");
+ }
+ }
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) {
+ if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+ continue;
+ // The first use of a callee-saved register in a function has cost 1.
+ // Don't start using a CSR when the CostPerUseLimit is low.
+ if (CostPerUseLimit == 1)
+ if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+ if (!MRI->isPhysRegUsed(CSR)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+ << PrintReg(CSR, TRI) << '\n');
+ continue;
+ }
+
+ if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
+ continue;
+
+ // Best so far.
+ BestPhys = PhysReg;
+
+ // Stop if the hint can be used.
+ if (Order.isHint())
+ break;
+ }
+
+ if (!BestPhys)
+ return 0;
+
+ evictInterference(VirtReg, BestPhys, NewVRegs);
+ return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// addSplitConstraints - Fill out the SplitConstraints vector based on the
+/// interference pattern in Physreg and its aliases. Add the constraints to
+/// SpillPlacement and return the static cost of this split in Cost, assuming
+/// that all preferences in SplitConstraints are met.
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+ float &Cost) {
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+
+ // Reset interference dependent info.
+ SplitConstraints.resize(UseBlocks.size());
+ float StaticCost = 0;
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+
+ BC.Number = BI.MBB->getNumber();
+ Intf.moveToBlock(BC.Number);
+ BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.ChangesValue = BI.FirstDef;
+
+ if (!Intf.hasInterference())
+ continue;
+
+ // Number of spill code instructions to insert.
+ unsigned Ins = 0;
+
+ // Interference for the live-in value.
+ if (BI.LiveIn) {
+ if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number))
+ BC.Entry = SpillPlacement::MustSpill, ++Ins;
+ else if (Intf.first() < BI.FirstInstr)
+ BC.Entry = SpillPlacement::PrefSpill, ++Ins;
+ else if (Intf.first() < BI.LastInstr)
+ ++Ins;
+ }
+
+ // Interference for the live-out value.
+ if (BI.LiveOut) {
+ if (Intf.last() >= SA->getLastSplitPoint(BC.Number))
+ BC.Exit = SpillPlacement::MustSpill, ++Ins;
+ else if (Intf.last() > BI.LastInstr)
+ BC.Exit = SpillPlacement::PrefSpill, ++Ins;
+ else if (Intf.last() > BI.FirstInstr)
+ ++Ins;
+ }
+
+ // Accumulate the total frequency of inserted spill code.
+ if (Ins)
+ StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+ }
+ Cost = StaticCost;
+
+ // Add constraints for use-blocks. Note that these are the only constraints
+ // that may add a positive bias, it is downhill from here.
+ SpillPlacer->addConstraints(SplitConstraints);
+ return SpillPlacer->scanActiveBundles();
+}
+
+
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+ ArrayRef<unsigned> Blocks) {
+ const unsigned GroupSize = 8;
+ SpillPlacement::BlockConstraint BCS[GroupSize];
+ unsigned TBS[GroupSize];
+ unsigned B = 0, T = 0;
+
+ for (unsigned i = 0; i != Blocks.size(); ++i) {
+ unsigned Number = Blocks[i];
+ Intf.moveToBlock(Number);
+
+ if (!Intf.hasInterference()) {
+ assert(T < GroupSize && "Array overflow");
+ TBS[T] = Number;
+ if (++T == GroupSize) {
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
+ T = 0;
+ }
+ continue;
+ }
+
+ assert(B < GroupSize && "Array overflow");
+ BCS[B].Number = Number;
+
+ // Interference for the live-in value.
+ if (Intf.first() <= Indexes->getMBBStartIdx(Number))
+ BCS[B].Entry = SpillPlacement::MustSpill;
+ else
+ BCS[B].Entry = SpillPlacement::PrefSpill;
+
+ // Interference for the live-out value.
+ if (Intf.last() >= SA->getLastSplitPoint(Number))
+ BCS[B].Exit = SpillPlacement::MustSpill;
+ else
+ BCS[B].Exit = SpillPlacement::PrefSpill;
+
+ if (++B == GroupSize) {
+ ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+ SpillPlacer->addConstraints(Array);
+ B = 0;
+ }
+ }
+
+ ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+ SpillPlacer->addConstraints(Array);
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
+}
+
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+ // Keep track of through blocks that have not been added to SpillPlacer.
+ BitVector Todo = SA->getThroughBlocks();
+ SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+ unsigned AddedTo = 0;
+#ifndef NDEBUG
+ unsigned Visited = 0;
+#endif
+
+ for (;;) {
+ ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+ // Find new through blocks in the periphery of PrefRegBundles.
+ for (int i = 0, e = NewBundles.size(); i != e; ++i) {
+ unsigned Bundle = NewBundles[i];
+ // Look at all blocks connected to Bundle in the full graph.
+ ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ unsigned Block = *I;
+ if (!Todo.test(Block))
+ continue;
+ Todo.reset(Block);
+ // This is a new through block. Add it to SpillPlacer later.
+ ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+ ++Visited;
+#endif
+ }
+ }
+ // Any new blocks to add?
+ if (ActiveBlocks.size() == AddedTo)
+ break;
+
+ // Compute through constraints from the interference, or assume that all
+ // through blocks prefer spilling when forming compact regions.
+ ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
+ if (Cand.PhysReg)
+ addThroughConstraints(Cand.Intf, NewBlocks);
+ else
+ // Provide a strong negative bias on through blocks to prevent unwanted
+ // liveness on loop backedges.
+ SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
+ AddedTo = ActiveBlocks.size();
+
+ // Perhaps iterating can enable more bundles?
+ SpillPlacer->iterate();
+ }
+ DEBUG(dbgs() << ", v=" << Visited);
+}
+
+/// calcCompactRegion - Compute the set of edge bundles that should be live
+/// when splitting the current live range into compact regions. Compact
+/// regions can be computed without looking at interference. They are the
+/// regions formed by removing all the live-through blocks from the live range.
+///
+/// Returns false if the current live range is already compact, or if the
+/// compact regions would form single block regions anyway.
+bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
+ // Without any through blocks, the live range is already compact.
+ if (!SA->getNumThroughBlocks())
+ return false;
+
+ // Compact regions don't correspond to any physreg.
+ Cand.reset(IntfCache, 0);
+
+ DEBUG(dbgs() << "Compact region bundles");
+
+ // Use the spill placer to determine the live bundles. GrowRegion pretends
+ // that all the through blocks have interference when PhysReg is unset.
+ SpillPlacer->prepare(Cand.LiveBundles);
+
+ // The static split cost will be zero since Cand.Intf reports no interference.
+ float Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ growRegion(Cand);
+ SpillPlacer->finish();
+
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ DEBUG({
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ return true;
+}
+
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+float RAGreedy::calcSpillCost() {
+ float Cost = 0;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ // We normally only need one spill instruction - a load or a store.
+ Cost += SpillPlacer->getBlockFrequency(Number);
+
+ // Unless the value is redefined in the block.
+ if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
+ Cost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return Cost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SplitConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
+ float GlobalCost = 0;
+ const BitVector &LiveBundles = Cand.LiveBundles;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+ unsigned Ins = 0;
+
+ if (BI.LiveIn)
+ Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
+ if (BI.LiveOut)
+ Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
+ if (Ins)
+ GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+ }
+
+ for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+ unsigned Number = Cand.ActiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+ if (!RegIn && !RegOut)
+ continue;
+ if (RegIn && RegOut) {
+ // We need double spill code if this block has interference.
+ Cand.Intf.moveToBlock(Number);
+ if (Cand.Intf.hasInterference())
+ GlobalCost += 2*SpillPlacer->getBlockFrequency(Number);
+ continue;
+ }
+ // live-in / stack-out or stack-in live-out.
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return GlobalCost;
+}
+
+/// splitAroundRegion - Split the current live range around the regions
+/// determined by BundleCand and GlobalCand.
+///
+/// Before calling this function, GlobalCand and BundleCand must be initialized
+/// so each bundle is assigned to a valid candidate, or NoCand for the
+/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
+/// objects must be initialized for the current live range, and intervals
+/// created for the used candidates.
+///
+/// @param LREdit The LiveRangeEdit object handling the current split.
+/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
+/// must appear in this list.
+void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
+ ArrayRef<unsigned> UsedCands) {
+ // These are the intervals created for new global ranges. We may create more
+ // intervals for local ranges.
+ const unsigned NumGlobalIntvs = LREdit.size();
+ DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n");
+ assert(NumGlobalIntvs && "No global intervals configured");
+
+ // Isolate even single instructions when dealing with a proper sub-class.
+ // That guarantees register class inflation for the stack interval because it
+ // is all copies.
+ unsigned Reg = SA->getParent().reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+
+ // First handle all the blocks with uses.
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+ if (BI.LiveIn) {
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+ }
+ if (BI.LiveOut) {
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ }
+
+ // Create separate intervals for isolated blocks with multiple uses.
+ if (!IntvIn && !IntvOut) {
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ continue;
+ }
+
+ if (IntvIn && IntvOut)
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ else if (IntvIn)
+ SE->splitRegInBlock(BI, IntvIn, IntfIn);
+ else
+ SE->splitRegOutBlock(BI, IntvOut, IntfOut);
+ }
+
+ // Handle live-through blocks. The relevant live-through blocks are stored in
+ // the ActiveBlocks list with each candidate. We need to filter out
+ // duplicates.
+ BitVector Todo = SA->getThroughBlocks();
+ for (unsigned c = 0; c != UsedCands.size(); ++c) {
+ ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ unsigned Number = Blocks[i];
+ if (!Todo.test(Number))
+ continue;
+ Todo.reset(Number);
+
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ if (!IntvIn && !IntvOut)
+ continue;
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ }
+ }
+
+ ++NumGlobalSplits;
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(Reg, LREdit.regs());
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ unsigned OrigBlocks = SA->getNumLiveBlocks();
+
+ // Sort out the new intervals created by splitting. We get four kinds:
+ // - Remainder intervals should not be split again.
+ // - Candidate intervals can be assigned to Cand.PhysReg.
+ // - Block-local splits are candidates for local splitting.
+ // - DCE leftovers should go back on the queue.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &Reg = *LREdit.get(i);
+
+ // Ignore old intervals from DCE.
+ if (getStage(Reg) != RS_New)
+ continue;
+
+ // Remainder interval. Don't try splitting again, spill if it doesn't
+ // allocate.
+ if (IntvMap[i] == 0) {
+ setStage(Reg, RS_Spill);
+ continue;
+ }
+
+ // Global intervals. Allow repeated splitting as long as the number of live
+ // blocks is strictly decreasing.
+ if (IntvMap[i] < NumGlobalIntvs) {
+ if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
+ DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+ << " blocks as original.\n");
+ // Don't allow repeated splitting as a safe guard against looping.
+ setStage(Reg, RS_Split2);
+ }
+ continue;
+ }
+
+ // Other intervals are treated as new. This includes local intervals created
+ // for blocks with multiple uses, and anything created by DCE.
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around region");
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ unsigned NumCands = 0;
+ unsigned BestCand = NoCand;
+ float BestCost;
+ SmallVector<unsigned, 8> UsedCands;
+
+ // Check if we can split this live range around a compact region.
+ bool HasCompact = calcCompactRegion(GlobalCand.front());
+ if (HasCompact) {
+ // Yes, keep GlobalCand[0] as the compact region candidate.
+ NumCands = 1;
+ BestCost = HUGE_VALF;
+ } else {
+ // No benefit from the compact region, our fallback will be per-block
+ // splitting. Make sure we find a solution that is cheaper than spilling.
+ BestCost = Hysteresis * calcSpillCost();
+ DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+ }
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Discard bad candidates before we run out of interference cache cursors.
+ // This will only affect register classes with a lot of registers (>32).
+ if (NumCands == IntfCache.getMaxCursors()) {
+ unsigned WorstCount = ~0u;
+ unsigned Worst = 0;
+ for (unsigned i = 0; i != NumCands; ++i) {
+ if (i == BestCand || !GlobalCand[i].PhysReg)
+ continue;
+ unsigned Count = GlobalCand[i].LiveBundles.count();
+ if (Count < WorstCount)
+ Worst = i, WorstCount = Count;
+ }
+ --NumCands;
+ GlobalCand[Worst] = GlobalCand[NumCands];
+ if (BestCand == NumCands)
+ BestCand = Worst;
+ }
+
+ if (GlobalCand.size() <= NumCands)
+ GlobalCand.resize(NumCands+1);
+ GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+ Cand.reset(IntfCache, PhysReg);
+
+ SpillPlacer->prepare(Cand.LiveBundles);
+ float Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
+ continue;
+ }
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+ if (Cost >= BestCost) {
+ DEBUG({
+ if (BestCand == NoCand)
+ dbgs() << " worse than no bundles\n";
+ else
+ dbgs() << " worse than "
+ << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+ });
+ continue;
+ }
+ growRegion(Cand);
+
+ SpillPlacer->finish();
+
+ // No live bundles, defer to splitSingleBlocks().
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << " no bundles.\n");
+ continue;
+ }
+
+ Cost += calcGlobalSplitCost(Cand);
+ DEBUG({
+ dbgs() << ", total = " << Cost << " with bundles";
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ if (Cost < BestCost) {
+ BestCand = NumCands;
+ BestCost = Hysteresis * Cost; // Prevent rounding effects.
+ }
+ ++NumCands;
+ }
+
+ // No solutions found, fall back to single block splitting.
+ if (!HasCompact && BestCand == NoCand)
+ return 0;
+
+ // Prepare split editor.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit, SplitSpillMode);
+
+ // Assign all edge bundles to the preferred candidate, or NoCand.
+ BundleCand.assign(Bundles->getNumBundles(), NoCand);
+
+ // Assign bundles for the best candidate region.
+ if (BestCand != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[BestCand];
+ if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
+ UsedCands.push_back(BestCand);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
+ << B << " bundles, intv " << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ // Assign bundles for the compact region.
+ if (HasCompact) {
+ GlobalSplitCandidate &Cand = GlobalCand.front();
+ assert(!Cand.PhysReg && "Compact region has no physreg");
+ if (unsigned B = Cand.getBundles(BundleCand, 0)) {
+ UsedCands.push_back(0);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv "
+ << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ splitAroundRegion(LREdit, UsedCands);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Per-Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryBlockSplit - Split a global live range around every block with uses. This
+/// creates a lot of local live ranges, that will be split by tryLocalSplit if
+/// they don't allocate.
+unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
+ unsigned Reg = VirtReg.reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit, SplitSpillMode);
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ }
+ // No blocks were split.
+ if (LREdit.empty())
+ return 0;
+
+ // We did split for some blocks.
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+
+ // Tell LiveDebugVariables about the new ranges.
+ DebugVars->splitRegister(Reg, LREdit.regs());
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Sort out the new intervals created by splitting. The remainder interval
+ // goes straight to spilling, the new local ranges get to stay RS_New.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &LI = *LREdit.get(i);
+ if (getStage(LI) == RS_New && IntvMap[i] == 0)
+ setStage(LI, RS_Spill);
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Per-Instruction Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryInstructionSplit - Split a live range around individual instructions.
+/// This is normally not worthwhile since the spiller is doing essentially the
+/// same thing. However, when the live range is in a constrained register
+/// class, it may help to insert copies such that parts of the live range can
+/// be moved to a larger register class.
+///
+/// This is similar to spilling to a larger register class.
+unsigned
+RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // There is no point to this if there are no larger sub-classes.
+ if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg)))
+ return 0;
+
+ // Always enable split spill mode, since we're effectively spilling to a
+ // register.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit, SplitEditor::SM_Size);
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 1)
+ return 0;
+
+ DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
+
+ // Split around every non-copy instruction.
+ for (unsigned i = 0; i != Uses.size(); ++i) {
+ if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
+ if (MI->isFullCopy()) {
+ DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
+ continue;
+ }
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]);
+ SE->useIntv(SegStart, SegStop);
+ }
+
+ if (LREdit.empty()) {
+ DEBUG(dbgs() << "All uses were copies.\n");
+ return 0;
+ }
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Assign all new registers to RS_Spill. This was the last chance.
+ setStage(LREdit.begin(), LREdit.end(), RS_Spill);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+ SmallVectorImpl<float> &GapWeight) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ const unsigned NumGaps = Uses.size()-1;
+
+ // Start and end points for the interference check.
+ SlotIndex StartIdx =
+ BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
+ SlotIndex StopIdx =
+ BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
+
+ GapWeight.assign(NumGaps, 0.0f);
+
+ // Add interference from each overlapping register.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
+ .checkInterference())
+ continue;
+
+ // We know that VirtReg is a continuous interval from FirstInstr to
+ // LastInstr, so we don't need InterferenceQuery.
+ //
+ // Interference that overlaps an instruction is counted in both gaps
+ // surrounding the instruction. The exception is interference before
+ // StartIdx and after StopIdx.
+ //
+ LiveIntervalUnion::SegmentIter IntI =
+ Matrix->getLiveUnions()[*Units] .find(StartIdx);
+ for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+ // Skip the gaps before IntI.
+ while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ // Update the gaps covered by IntI.
+ const float weight = IntI.value()->weight;
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+ if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+
+ // Add fixed interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ const LiveInterval &LI = LIS->getRegUnit(*Units);
+ LiveInterval::const_iterator I = LI.find(StartIdx);
+ LiveInterval::const_iterator E = LI.end();
+
+ // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
+ for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
+ while (Uses[Gap+1].getBoundaryIndex() < I->start)
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = HUGE_VALF;
+ if (Uses[Gap+1].getBaseIndex() >= I->end)
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+
+ // Note that it is possible to have an interval that is live-in or live-out
+ // while only covering a single block - A phi-def can use undef values from
+ // predecessors, and the block could be a single-block loop.
+ // We don't bother doing anything clever about such a case, we simply assume
+ // that the interval is continuous from FirstInstr to LastInstr. We should
+ // make sure that we don't do anything illegal to such an interval, though.
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 2)
+ return 0;
+ const unsigned NumGaps = Uses.size()-1;
+
+ DEBUG({
+ dbgs() << "tryLocalSplit: ";
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ dbgs() << ' ' << Uses[i];
+ dbgs() << '\n';
+ });
+
+ // If VirtReg is live across any register mask operands, compute a list of
+ // gaps with register masks.
+ SmallVector<unsigned, 8> RegMaskGaps;
+ if (Matrix->checkRegMaskInterference(VirtReg)) {
+ // Get regmask slots for the whole block.
+ ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
+ DEBUG(dbgs() << RMS.size() << " regmasks in block:");
+ // Constrain to VirtReg's live range.
+ unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
+ Uses.front().getRegSlot()) - RMS.begin();
+ unsigned re = RMS.size();
+ for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
+ // Look for Uses[i] <= RMS <= Uses[i+1].
+ assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
+ if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
+ continue;
+ // Skip a regmask on the same instruction as the last use. It doesn't
+ // overlap the live range.
+ if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
+ break;
+ DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
+ RegMaskGaps.push_back(i);
+ // Advance ri to the next gap. A regmask on one of the uses counts in
+ // both gaps.
+ while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
+ ++ri;
+ }
+ DEBUG(dbgs() << '\n');
+ }
+
+ // Since we allow local split results to be split again, there is a risk of
+ // creating infinite loops. It is tempting to require that the new live
+ // ranges have less instructions than the original. That would guarantee
+ // convergence, but it is too strict. A live range with 3 instructions can be
+ // split 2+3 (including the COPY), and we want to allow that.
+ //
+ // Instead we use these rules:
+ //
+ // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
+ // noop split, of course).
+ // 2. Require progress be made for ranges with getStage() == RS_Split2. All
+ // the new ranges must have fewer instructions than before the split.
+ // 3. New ranges with the same number of instructions are marked RS_Split2,
+ // smaller ranges are marked RS_New.
+ //
+ // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
+ // excessive splitting and infinite loops.
+ //
+ bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
+
+ // Best split candidate.
+ unsigned BestBefore = NumGaps;
+ unsigned BestAfter = 0;
+ float BestDiff = 0;
+
+ const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
+ SmallVector<float, 8> GapWeight;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Keep track of the largest spill weight that would need to be evicted in
+ // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+ calcGapWeights(PhysReg, GapWeight);
+
+ // Remove any gaps with regmask clobbers.
+ if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
+ for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
+ GapWeight[RegMaskGaps[i]] = HUGE_VALF;
+
+ // Try to find the best sequence of gaps to close.
+ // The new spill weight must be larger than any gap interference.
+
+ // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+ unsigned SplitBefore = 0, SplitAfter = 1;
+
+ // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+ // It is the spill weight that needs to be evicted.
+ float MaxGap = GapWeight[0];
+
+ for (;;) {
+ // Live before/after split?
+ const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+ const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+ << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+ << " i=" << MaxGap);
+
+ // Stop before the interval gets so big we wouldn't be making progress.
+ if (!LiveBefore && !LiveAfter) {
+ DEBUG(dbgs() << " all\n");
+ break;
+ }
+ // Should the interval be extended or shrunk?
+ bool Shrink = true;
+
+ // How many gaps would the new range have?
+ unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
+
+ // Legally, without causing looping?
+ bool Legal = !ProgressRequired || NewGaps < NumGaps;
+
+ if (Legal && MaxGap < HUGE_VALF) {
+ // Estimate the new spill weight. Each instruction reads or writes the
+ // register. Conservatively assume there are no read-modify-write
+ // instructions.
+ //
+ // Try to guess the size of the new interval.
+ const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
+ Uses[SplitBefore].distance(Uses[SplitAfter]) +
+ (LiveBefore + LiveAfter)*SlotIndex::InstrDist);
+ // Would this split be possible to allocate?
+ // Never allocate all gaps, we wouldn't be making progress.
+ DEBUG(dbgs() << " w=" << EstWeight);
+ if (EstWeight * Hysteresis >= MaxGap) {
+ Shrink = false;
+ float Diff = EstWeight - MaxGap;
+ if (Diff > BestDiff) {
+ DEBUG(dbgs() << " (best)");
+ BestDiff = Hysteresis * Diff;
+ BestBefore = SplitBefore;
+ BestAfter = SplitAfter;
+ }
+ }
+ }
+
+ // Try to shrink.
+ if (Shrink) {
+ if (++SplitBefore < SplitAfter) {
+ DEBUG(dbgs() << " shrink\n");
+ // Recompute the max when necessary.
+ if (GapWeight[SplitBefore - 1] >= MaxGap) {
+ MaxGap = GapWeight[SplitBefore];
+ for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+ MaxGap = std::max(MaxGap, GapWeight[i]);
+ }
+ continue;
+ }
+ MaxGap = 0;
+ }
+
+ // Try to extend the interval.
+ if (SplitAfter >= NumGaps) {
+ DEBUG(dbgs() << " end\n");
+ break;
+ }
+
+ DEBUG(dbgs() << " extend\n");
+ MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
+ }
+ }
+
+ // Didn't find any candidates?
+ if (BestBefore == NumGaps)
+ return 0;
+
+ DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+ << '-' << Uses[BestAfter] << ", " << BestDiff
+ << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit);
+
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
+ SE->useIntv(SegStart, SegStop);
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+ // If the new range has the same number of instructions as before, mark it as
+ // RS_Split2 so the next split will be forced to make progress. Otherwise,
+ // leave the new intervals as RS_New so they can compete.
+ bool LiveBefore = BestBefore != 0 || BI.LiveIn;
+ bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
+ unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
+ if (NewGaps >= NumGaps) {
+ DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ assert(!ProgressRequired && "Didn't make progress when it was required.");
+ for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
+ if (IntvMap[i] == 1) {
+ setStage(*LREdit.get(i), RS_Split2);
+ DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ ++NumLocalSplits;
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*>&NewVRegs) {
+ // Ranges must be Split2 or less.
+ if (getStage(VirtReg) >= RS_Spill)
+ return 0;
+
+ // Local intervals are handled separately.
+ if (LIS->intervalIsInOneMBB(VirtReg)) {
+ NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+ SA->analyze(&VirtReg);
+ unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ return tryInstructionSplit(VirtReg, Order, NewVRegs);
+ }
+
+ NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+ SA->analyze(&VirtReg);
+
+ // FIXME: SplitAnalysis may repair broken live ranges coming from the
+ // coalescer. That may cause the range to become allocatable which means that
+ // tryRegionSplit won't be making progress. This check should be replaced with
+ // an assertion when the coalescer is fixed.
+ if (SA->didRepairRange()) {
+ // VirtReg has changed, so all cached queries are invalid.
+ Matrix->invalidateVirtRegs();
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ return PhysReg;
+ }
+
+ // First try to split around a region spanning multiple blocks. RS_Split2
+ // ranges already made dubious progress with region splitting, so they go
+ // straight to single block splitting.
+ if (getStage(VirtReg) < RS_Split2) {
+ unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ }
+
+ // Then isolate blocks.
+ return tryBlockSplit(VirtReg, Order, NewVRegs);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // First try assigning a free register.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ LiveRangeStage Stage = getStage(VirtReg);
+ DEBUG(dbgs() << StageName[Stage]
+ << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
+
+ // Try to evict a less worthy live range, but only for ranges from the primary
+ // queue. The RS_Split ranges already failed to do this, and they should not
+ // get a second chance until they have been split.
+ if (Stage != RS_Split)
+ if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+ // The first time we see a live range, don't try to split or spill.
+ // Wait until the second time, when all smaller ranges have been allocated.
+ // This gives a better picture of the interference to split around.
+ if (Stage < RS_Split) {
+ setStage(VirtReg, RS_Split);
+ DEBUG(dbgs() << "wait for second round\n");
+ NewVRegs.push_back(&VirtReg);
+ return 0;
+ }
+
+ // If we couldn't allocate a register from spilling, there is probably some
+ // invalid inline assembly. The base class wil report it.
+ if (Stage >= RS_Done || !VirtReg.isSpillable())
+ return ~0u;
+
+ // Try splitting VirtReg or interferences.
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Finally spill VirtReg itself.
+ NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ spiller().spill(LRE);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+
+ if (VerifyEnabled)
+ MF->verify(this, "After spilling");
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+ << "********** Function: " << mf.getName() << '\n');
+
+ MF = &mf;
+ if (VerifyEnabled)
+ MF->verify(this, "Before greedy register allocator");
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+ Loops = &getAnalysis<MachineLoopInfo>();
+ Bundles = &getAnalysis<EdgeBundles>();
+ SpillPlacer = &getAnalysis<SpillPlacement>();
+ DebugVars = &getAnalysis<LiveDebugVariables>();
+
+ SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+ SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
+ ExtraRegInfo.clear();
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ NextCascade = 1;
+ IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
+ GlobalCand.resize(32); // This will grow as needed.
+
+ allocatePhysRegs();
+ releaseMemory();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 0000000..607edac
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,638 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+// architectures. In Proceedings of the Joint Conference on Languages,
+// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+// NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "RegisterCoalescer.h"
+#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <limits>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <vector>
+
+using namespace llvm;
+
+static RegisterRegAlloc
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+ createDefaultPBQPRegisterAllocator);
+
+static cl::opt<bool>
+pbqpCoalescing("pbqp-coalescing",
+ cl::desc("Attempt coalescing during PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+pbqpDumpGraphs("pbqp-dump-graphs",
+ cl::desc("Dump graphs for each function/round in the compilation unit."),
+ cl::init(false), cl::Hidden);
+#endif
+
+namespace {
+
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+ static char ID;
+
+ /// Construct a PBQP register allocator.
+ RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
+ : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "PBQP Register Allocator";
+ }
+
+ /// PBQP analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ /// Perform register allocation
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+ typedef std::set<unsigned> RegSet;
+
+
+ std::auto_ptr<PBQPBuilder> builder;
+
+ char *customPassID;
+
+ MachineFunction *mf;
+ const TargetMachine *tm;
+ const TargetRegisterInfo *tri;
+ const TargetInstrInfo *tii;
+ const MachineLoopInfo *loopInfo;
+ MachineRegisterInfo *mri;
+
+ std::auto_ptr<Spiller> spiller;
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ VirtRegMap *vrm;
+
+ RegSet vregsToAlloc, emptyIntervalVRegs;
+
+ /// \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc();
+
+ /// \brief Given a solved PBQP problem maps this solution back to a register
+ /// assignment.
+ bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution);
+
+ /// \brief Postprocessing before final spilling. Sets basic block "live in"
+ /// variables.
+ void finalizeAlloc() const;
+
+};
+
+char RegAllocPBQP::ID = 0;
+
+} // End anonymous namespace.
+
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+ Node2VReg::const_iterator vregItr = node2VReg.find(node);
+ assert(vregItr != node2VReg.end() && "No vreg for node.");
+ return vregItr->second;
+}
+
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+ VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+ assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+ return nodeItr->second;
+
+}
+
+const PBQPRAProblem::AllowedSet&
+ PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+ AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+ assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+ const AllowedSet &allowedSet = allowedSetItr->second;
+ return allowedSet;
+}
+
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+ assert(isPRegOption(vreg, option) && "Not a preg option.");
+
+ const AllowedSet& allowedSet = getAllowedSet(vreg);
+ assert(option <= allowedSet.size() && "Option outside allowed set.");
+ return allowedSet[option - 1];
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
+
+ LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
+ MachineRegisterInfo *mri = &mf->getRegInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+
+ std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+ PBQP::Graph &g = p->getGraph();
+ RegSet pregs;
+
+ // Collect the set of preg intervals, record that they're used in the MF.
+ for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) {
+ if (mri->def_empty(Reg))
+ continue;
+ pregs.insert(Reg);
+ mri->setPhysRegUsed(Reg);
+ }
+
+ // Iterate over vregs.
+ for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+ vregItr != vregEnd; ++vregItr) {
+ unsigned vreg = *vregItr;
+ const TargetRegisterClass *trc = mri->getRegClass(vreg);
+ LiveInterval *vregLI = &LIS->getInterval(vreg);
+
+ // Record any overlaps with regmask operands.
+ BitVector regMaskOverlaps;
+ LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps);
+
+ // Compute an initial allowed set for the current vreg.
+ typedef std::vector<unsigned> VRAllowed;
+ VRAllowed vrAllowed;
+ ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
+ for (unsigned i = 0; i != rawOrder.size(); ++i) {
+ unsigned preg = rawOrder[i];
+ if (mri->isReserved(preg))
+ continue;
+
+ // vregLI crosses a regmask operand that clobbers preg.
+ if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg))
+ continue;
+
+ // vregLI overlaps fixed regunit interference.
+ bool Interference = false;
+ for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) {
+ if (vregLI->overlaps(LIS->getRegUnit(*Units))) {
+ Interference = true;
+ break;
+ }
+ }
+ if (Interference)
+ continue;
+
+ // preg is usable for this virtual register.
+ vrAllowed.push_back(preg);
+ }
+
+ // Construct the node.
+ PBQP::Graph::NodeItr node =
+ g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
+
+ // Record the mapping and allowed set in the problem.
+ p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
+
+ PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+ vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+
+ addSpillCosts(g.getNodeCosts(node), spillCost);
+ }
+
+ for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+ vr1Itr != vrEnd; ++vr1Itr) {
+ unsigned vr1 = *vr1Itr;
+ const LiveInterval &l1 = lis->getInterval(vr1);
+ const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+ for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+ vr2Itr != vrEnd; ++vr2Itr) {
+ unsigned vr2 = *vr2Itr;
+ const LiveInterval &l2 = lis->getInterval(vr2);
+ const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+ assert(!l2.empty() && "Empty interval in vreg set?");
+ if (l1.overlaps(l2)) {
+ PBQP::Graph::EdgeItr edge =
+ g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+ PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+ addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
+ }
+ }
+ }
+
+ return p;
+}
+
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+ PBQP::PBQPNum spillCost) {
+ costVec[0] = spillCost;
+}
+
+void PBQPBuilder::addInterferenceCosts(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ const TargetRegisterInfo *tri) {
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
+
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (tri->regsOverlap(preg1, preg2)) {
+ costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ }
+ }
+ }
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+ MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
+
+ std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+ PBQP::Graph &g = p->getGraph();
+
+ const TargetMachine &tm = mf->getTarget();
+ CoalescerPair cp(*tm.getRegisterInfo());
+
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (MachineFunction::const_iterator mbbItr = mf->begin(),
+ mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ const MachineBasicBlock *mbb = &*mbbItr;
+
+ for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+ miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ const MachineInstr *mi = &*miItr;
+
+ if (!cp.setRegisters(mi)) {
+ continue; // Not coalescable.
+ }
+
+ if (cp.getSrcReg() == cp.getDstReg()) {
+ continue; // Already coalesced.
+ }
+
+ unsigned dst = cp.getDstReg(),
+ src = cp.getSrcReg();
+
+ const float copyFactor = 0.5; // Cost of copy relative to load. Current
+ // value plucked randomly out of the air.
+
+ PBQP::PBQPNum cBenefit =
+ copyFactor * LiveIntervals::getSpillWeight(false, true,
+ loopInfo->getLoopDepth(mbb));
+
+ if (cp.isPhys()) {
+ if (!mf->getRegInfo().isAllocatable(dst)) {
+ continue;
+ }
+
+ const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+ unsigned pregOpt = 0;
+ while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+ ++pregOpt;
+ }
+ if (pregOpt < allowed.size()) {
+ ++pregOpt; // +1 to account for spill option.
+ PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+ addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+ }
+ } else {
+ const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+ const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+ PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+ PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+ PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+ if (edge == g.edgesEnd()) {
+ edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+ allowed2->size() + 1,
+ 0));
+ } else {
+ if (g.getEdgeNode1(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(allowed1, allowed2);
+ }
+ }
+
+ addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+ cBenefit);
+ }
+ }
+ }
+
+ return p;
+}
+
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+ unsigned pregOption,
+ PBQP::PBQPNum benefit) {
+ costVec[pregOption] += -benefit;
+}
+
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ PBQP::PBQPNum benefit) {
+
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
+
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (preg1 == preg2) {
+ costMat[i + 1][j + 1] += -benefit;
+ }
+ }
+ }
+}
+
+
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesCFG();
+ au.addRequired<AliasAnalysis>();
+ au.addPreserved<AliasAnalysis>();
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.addPreserved<LiveIntervals>();
+ //au.addRequiredID(SplitCriticalEdgesID);
+ if (customPassID)
+ au.addRequiredID(*customPassID);
+ au.addRequired<CalculateSpillWeights>();
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addRequired<VirtRegMap>();
+ au.addPreserved<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
+
+ // Iterate over all live ranges.
+ for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (mri->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *li = &lis->getInterval(Reg);
+
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!li->empty()) {
+ vregsToAlloc.insert(li->reg);
+ } else {
+ emptyIntervalVRegs.insert(li->reg);
+ }
+ }
+}
+
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution) {
+ // Set to true if we have any spills
+ bool anotherRoundNeeded = false;
+
+ // Clear the existing allocation.
+ vrm->clearAllVirt();
+
+ const PBQP::Graph &g = problem.getGraph();
+ // Iterate over the nodes mapping the PBQP solution to a register
+ // assignment.
+ for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+ nodeEnd = g.nodesEnd();
+ node != nodeEnd; ++node) {
+ unsigned vreg = problem.getVRegForNode(node);
+ unsigned alloc = solution.getSelection(node);
+
+ if (problem.isPRegOption(vreg, alloc)) {
+ unsigned preg = problem.getPRegForOption(vreg, alloc);
+ DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> "
+ << tri->getName(preg) << "\n");
+ assert(preg != 0 && "Invalid preg selected.");
+ vrm->assignVirt2Phys(vreg, preg);
+ } else if (problem.isSpillOption(vreg, alloc)) {
+ vregsToAlloc.erase(vreg);
+ SmallVector<LiveInterval*, 8> newSpills;
+ LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
+ spiller->spill(LRE);
+
+ DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: "
+ << LRE.getParent().weight << ", New vregs: ");
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
+ itr != end; ++itr) {
+ assert(!(*itr)->empty() && "Empty spill range.");
+ DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " ");
+ vregsToAlloc.insert((*itr)->reg);
+ }
+
+ DEBUG(dbgs() << ")\n");
+
+ // We need another round if spill intervals were added.
+ anotherRoundNeeded |= !LRE.empty();
+ } else {
+ llvm_unreachable("Unknown allocation option.");
+ }
+ }
+
+ return !anotherRoundNeeded;
+}
+
+
+void RegAllocPBQP::finalizeAlloc() const {
+ // First allocate registers for the empty intervals.
+ for (RegSet::const_iterator
+ itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
+ itr != end; ++itr) {
+ LiveInterval *li = &lis->getInterval(*itr);
+
+ unsigned physReg = mri->getSimpleHint(li->reg);
+
+ if (physReg == 0) {
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+ physReg = liRC->getRawAllocationOrder(*mf).front();
+ }
+
+ vrm->assignVirt2Phys(li->reg, physReg);
+ }
+}
+
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+
+ mf = &MF;
+ tm = &mf->getTarget();
+ tri = tm->getRegisterInfo();
+ tii = tm->getInstrInfo();
+ mri = &mf->getRegInfo();
+
+ lis = &getAnalysis<LiveIntervals>();
+ lss = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ vrm = &getAnalysis<VirtRegMap>();
+ spiller.reset(createInlineSpiller(*this, MF, *vrm));
+
+ mri->freezeReservedRegs(MF);
+
+ DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n");
+
+ // Allocator main loop:
+ //
+ // * Map current regalloc problem to a PBQP problem
+ // * Solve the PBQP problem
+ // * Map the solution back to a register allocation
+ // * Spill if necessary
+ //
+ // This process is continued till no more spills are generated.
+
+ // Find the vreg intervals in need of allocation.
+ findVRegIntervalsToAlloc();
+
+#ifndef NDEBUG
+ const Function* func = mf->getFunction();
+ std::string fqn =
+ func->getParent()->getModuleIdentifier() + "." +
+ func->getName().str();
+#endif
+
+ // If there are non-empty intervals allocate them using pbqp.
+ if (!vregsToAlloc.empty()) {
+
+ bool pbqpAllocComplete = false;
+ unsigned round = 0;
+
+ while (!pbqpAllocComplete) {
+ DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
+
+ std::auto_ptr<PBQPRAProblem> problem =
+ builder->build(mf, lis, loopInfo, vregsToAlloc);
+
+#ifndef NDEBUG
+ if (pbqpDumpGraphs) {
+ std::ostringstream rs;
+ rs << round;
+ std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph");
+ std::string tmp;
+ raw_fd_ostream os(graphFileName.c_str(), tmp);
+ DEBUG(dbgs() << "Dumping graph for round " << round << " to \""
+ << graphFileName << "\"\n");
+ problem->getGraph().dump(os);
+ }
+#endif
+
+ PBQP::Solution solution =
+ PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+ problem->getGraph());
+
+ pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
+
+ ++round;
+ }
+ }
+
+ // Finalise allocation, allocate empty ranges.
+ finalizeAlloc();
+ vregsToAlloc.clear();
+ emptyIntervalVRegs.clear();
+
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+
+ return true;
+}
+
+FunctionPass* llvm::createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder> builder,
+ char *customPassID) {
+ return new RegAllocPBQP(builder, customPassID);
+}
+
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+ if (pbqpCoalescing) {
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+ } // else
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
+
+#undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
new file mode 100644
index 0000000..87382d8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -0,0 +1,146 @@
+//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
+ cl::desc("Limit all regclasses to N registers"));
+
+RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
+{}
+
+void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
+ bool Update = false;
+ MF = &mf;
+
+ // Allocate new array the first time we see a new target.
+ if (MF->getTarget().getRegisterInfo() != TRI) {
+ TRI = MF->getTarget().getRegisterInfo();
+ RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
+ Update = true;
+ }
+
+ // Does this MF have different CSRs?
+ const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
+ if (Update || CSR != CalleeSaved) {
+ // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+ // overlapping CSR.
+ CSRNum.clear();
+ CSRNum.resize(TRI->getNumRegs(), 0);
+ for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ Update = true;
+ }
+ CalleeSaved = CSR;
+
+ // Different reserved registers?
+ const BitVector &RR = MF->getRegInfo().getReservedRegs();
+ if (Reserved.size() != RR.size() || RR != Reserved) {
+ Update = true;
+ Reserved = RR;
+ }
+
+ // Invalidate cached information from previous function.
+ if (Update)
+ ++Tag;
+}
+
+/// compute - Compute the preferred allocation order for RC with reserved
+/// registers filtered out. Volatile registers come first followed by CSR
+/// aliases ordered according to the CSR order specified by the target.
+void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
+ RCInfo &RCI = RegClass[RC->getID()];
+
+ // Raw register count, including all reserved regs.
+ unsigned NumRegs = RC->getNumRegs();
+
+ if (!RCI.Order)
+ RCI.Order.reset(new MCPhysReg[NumRegs]);
+
+ unsigned N = 0;
+ SmallVector<MCPhysReg, 16> CSRAlias;
+ unsigned MinCost = 0xff;
+ unsigned LastCost = ~0u;
+ unsigned LastCostChange = 0;
+
+ // FIXME: Once targets reserve registers instead of removing them from the
+ // allocation order, we can simply use begin/end here.
+ ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
+ for (unsigned i = 0; i != RawOrder.size(); ++i) {
+ unsigned PhysReg = RawOrder[i];
+ // Remove reserved registers from the allocation order.
+ if (Reserved.test(PhysReg))
+ continue;
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+ MinCost = std::min(MinCost, Cost);
+
+ if (CSRNum[PhysReg])
+ // PhysReg aliases a CSR, save it for later.
+ CSRAlias.push_back(PhysReg);
+ else {
+ if (Cost != LastCost)
+ LastCostChange = N;
+ RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
+ }
+ RCI.NumRegs = N + CSRAlias.size();
+ assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+
+ // CSR aliases go after the volatile registers, preserve the target's order.
+ for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
+ unsigned PhysReg = CSRAlias[i];
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+ if (Cost != LastCost)
+ LastCostChange = N;
+ RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
+
+ // Register allocator stress test. Clip register class to N registers.
+ if (StressRA && RCI.NumRegs > StressRA)
+ RCI.NumRegs = StressRA;
+
+ // Check if RC is a proper sub-class.
+ if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
+ if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
+ RCI.ProperSubClass = true;
+
+ RCI.MinCost = uint8_t(MinCost);
+ RCI.LastCostChange = LastCostChange;
+
+ DEBUG({
+ dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
+ for (unsigned I = 0; I != RCI.NumRegs; ++I)
+ dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
+ dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
+ });
+
+ // RCI is now up-to-date.
+ RCI.Tag = Tag;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 0000000..d85646d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,2193 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends , "Number of copies extended");
+STATISTIC(NumReMats , "Number of instructions re-materialized");
+STATISTIC(NumInflated , "Number of register classes inflated");
+STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
+STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
+
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true));
+
+// Temporary flag to test critical edge unsplitting.
+static cl::opt<bool>
+EnableJoinSplits("join-splitedges",
+ cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
+
+// Temporary flag to test global copy optimization.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalCopies("join-globalcopies",
+ cl::desc("Coalesce copies that span blocks (default=subtarget)"),
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+ cl::desc("Verify machine instrs before and after register coalescing"),
+ cl::Hidden);
+
+namespace {
+ class RegisterCoalescer : public MachineFunctionPass,
+ private LiveRangeEdit::Delegate {
+ MachineFunction* MF;
+ MachineRegisterInfo* MRI;
+ const TargetMachine* TM;
+ const TargetRegisterInfo* TRI;
+ const TargetInstrInfo* TII;
+ LiveIntervals *LIS;
+ const MachineLoopInfo* Loops;
+ AliasAnalysis *AA;
+ RegisterClassInfo RegClassInfo;
+
+ /// \brief True if the coalescer should aggressively coalesce global copies
+ /// in favor of keeping local copies.
+ bool JoinGlobalCopies;
+
+ /// \brief True if the coalescer should aggressively coalesce fall-thru
+ /// blocks exclusively containing copies.
+ bool JoinSplitEdges;
+
+ /// WorkList - Copy instructions yet to be coalesced.
+ SmallVector<MachineInstr*, 8> WorkList;
+ SmallVector<MachineInstr*, 8> LocalWorkList;
+
+ /// ErasedInstrs - Set of instruction pointers that have been erased, and
+ /// that may be present in WorkList.
+ SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
+
+ /// Dead instructions that are about to be deleted.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ /// Virtual registers to be considered for register class inflation.
+ SmallVector<unsigned, 8> InflateRegs;
+
+ /// Recursively eliminate dead defs in DeadDefs.
+ void eliminateDeadDefs();
+
+ /// LiveRangeEdit callback.
+ void LRE_WillEraseInstruction(MachineInstr *MI);
+
+ /// coalesceLocals - coalesce the LocalWorkList.
+ void coalesceLocals();
+
+ /// joinAllIntervals - join compatible live intervals
+ void joinAllIntervals();
+
+ /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into WorkList.
+ void copyCoalesceInMBB(MachineBasicBlock *MBB);
+
+ /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return
+ /// true if any progress was made.
+ bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
+
+ /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// which are the src/dst of the copy instruction CopyMI. This returns
+ /// true if the copy was successfully coalesced away. If it is not
+ /// currently possible to coalesce this interval, but it may be possible if
+ /// other things get coalesced, then it returns true by reference in
+ /// 'Again'.
+ bool joinCopy(MachineInstr *TheCopy, bool &Again);
+
+ /// joinIntervals - Attempt to join these two intervals. On failure, this
+ /// returns false. The output "SrcInt" will not have been modified, so we
+ /// can use this information below to update aliases.
+ bool joinIntervals(CoalescerPair &CP);
+
+ /// Attempt joining two virtual registers. Return true on success.
+ bool joinVirtRegs(CoalescerPair &CP);
+
+ /// Attempt joining with a reserved physreg.
+ bool joinReservedPhysReg(CoalescerPair &CP);
+
+ /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// the source value number is defined by a copy from the destination reg
+ /// see if we can merge these two destination reg valno# into a single
+ /// value number, eliminating a copy.
+ bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+
+ /// hasOtherReachingDefs - Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+
+ /// reMaterializeTrivialDef - If the source of a copy is defined by a
+ /// trivial computation, replace the copy by rematerialize the definition.
+ bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI);
+
+ /// canJoinPhys - Return true if a physreg copy should be joined.
+ bool canJoinPhys(const CoalescerPair &CP);
+
+ /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// update the subregister number if it is not zero. If DstReg is a
+ /// physical register and the existing subregister number of the def / use
+ /// being updated is not zero, make sure to set it to the correct physical
+ /// subregister.
+ void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+
+ /// eliminateUndefCopy - Handle copies of undef values.
+ bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ RegisterCoalescer() : MachineFunctionPass(ID) {
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ /// runOnMachineFunction - pass entry point
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ /// print - Implement the dump method.
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+ };
+} /// end anonymous namespace
+
+char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
+
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+
+char RegisterCoalescer::ID = 0;
+
+static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
+ unsigned &Src, unsigned &Dst,
+ unsigned &SrcSub, unsigned &DstSub) {
+ if (MI->isCopy()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = MI->getOperand(0).getSubReg();
+ Src = MI->getOperand(1).getReg();
+ SrcSub = MI->getOperand(1).getSubReg();
+ } else if (MI->isSubregToReg()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
+ MI->getOperand(3).getImm());
+ Src = MI->getOperand(2).getReg();
+ SrcSub = MI->getOperand(2).getSubReg();
+ } else
+ return false;
+ return true;
+}
+
+// Return true if this block should be vacated by the coalescer to eliminate
+// branches. The important cases to handle in the coalescer are critical edges
+// split during phi elimination which contain only copies. Simple blocks that
+// contain non-branches should also be vacated, but this can be handled by an
+// earlier pass similar to early if-conversion.
+static bool isSplitEdge(const MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ++MII) {
+ if (!MII->isCopyLike() && !MII->isUnconditionalBranch())
+ return false;
+ }
+ return true;
+}
+
+bool CoalescerPair::setRegisters(const MachineInstr *MI) {
+ SrcReg = DstReg = 0;
+ SrcIdx = DstIdx = 0;
+ NewRC = 0;
+ Flipped = CrossClass = false;
+
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+ Partial = SrcSub || DstSub;
+
+ // If one register is a physreg, it must be Dst.
+ if (TargetRegisterInfo::isPhysicalRegister(Src)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ Flipped = true;
+ }
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+ // Eliminate DstSub on a physreg.
+ if (DstSub) {
+ Dst = TRI.getSubReg(Dst, DstSub);
+ if (!Dst) return false;
+ DstSub = 0;
+ }
+
+ // Eliminate SrcSub by picking a corresponding Dst superregister.
+ if (SrcSub) {
+ Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+ if (!Dst) return false;
+ SrcSub = 0;
+ } else if (!MRI.getRegClass(Src)->contains(Dst)) {
+ return false;
+ }
+ } else {
+ // Both registers are virtual.
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+
+ // Both registers have subreg indices.
+ if (SrcSub && DstSub) {
+ // Copies between different sub-registers are never coalescable.
+ if (Src == Dst && SrcSub != DstSub)
+ return false;
+
+ NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
+ SrcIdx, DstIdx);
+ if (!NewRC)
+ return false;
+ } else if (DstSub) {
+ // SrcReg will be merged with a sub-register of DstReg.
+ SrcIdx = DstSub;
+ NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ } else if (SrcSub) {
+ // DstReg will be merged with a sub-register of SrcReg.
+ DstIdx = SrcSub;
+ NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
+ } else {
+ // This is a straight copy without sub-registers.
+ NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
+ }
+
+ // The combined constraint may be impossible to satisfy.
+ if (!NewRC)
+ return false;
+
+ // Prefer SrcReg to be a sub-register of DstReg.
+ // FIXME: Coalescer should support subregs symmetrically.
+ if (DstIdx && !SrcIdx) {
+ std::swap(Src, Dst);
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
+ }
+
+ CrossClass = NewRC != DstRC || NewRC != SrcRC;
+ }
+ // Check our invariants
+ assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
+ assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+ "Cannot have a physical SubIdx");
+ SrcReg = Src;
+ DstReg = Dst;
+ return true;
+}
+
+bool CoalescerPair::flip() {
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return false;
+ std::swap(SrcReg, DstReg);
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
+ return true;
+}
+
+bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+
+ // Find the virtual register that is SrcReg.
+ if (Dst == SrcReg) {
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ } else if (Src != SrcReg) {
+ return false;
+ }
+
+ // Now check that Dst matches DstReg.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
+ // DstSub could be set for a physreg from INSERT_SUBREG.
+ if (DstSub)
+ Dst = TRI.getSubReg(Dst, DstSub);
+ // Full copy of Src.
+ if (!SrcSub)
+ return DstReg == Dst;
+ // This is a partial register copy. Check that the parts match.
+ return TRI.getSubReg(DstReg, SrcSub) == Dst;
+ } else {
+ // DstReg is virtual.
+ if (DstReg != Dst)
+ return false;
+ // Registers match, do the subregisters line up?
+ return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
+ TRI.composeSubRegIndices(DstIdx, DstSub);
+ }
+}
+
+void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RegisterCoalescer::eliminateDeadDefs() {
+ SmallVector<LiveInterval*, 8> NewRegs;
+ LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs);
+}
+
+// Callback from eliminateDeadDefs().
+void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
+ // MI may be in WorkList. Make sure we don't visit it.
+ ErasedInstrs.insert(MI);
+}
+
+/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy. For example:
+///
+/// A3 = B0
+/// ...
+/// B1 = A3 <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert(!CP.isPartial() && "This doesn't work for partial copies.");
+ assert(!CP.isPhys() && "This doesn't work for physreg copies.");
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ if (BLR == IntB.end()) return false;
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (BValNo->def != CopyIdx) return false;
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
+ LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
+ // The live range might not exist after fun with physreg coalescing.
+ if (ALR == IntA.end()) return false;
+ VNInfo *AValNo = ALR->valno;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+ // Get the instruction that defines this value number.
+ MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
+ // Don't allow any partial copies, even if isCoalescable() allows them.
+ if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
+ return false;
+
+ // Get the LiveRange in IntB that this value number starts with.
+ LiveInterval::iterator ValLR =
+ IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
+ if (ValLR == IntB.end())
+ return false;
+
+ // Make sure that the end of the live range is inside the same block as
+ // CopyMI.
+ MachineInstr *ValLREndInst =
+ LIS->getInstructionFromIndex(ValLR->end.getPrevSlot());
+ if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+ return false;
+
+ // Okay, we now know that ValLR ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live ranges between them in
+ // IntB, we can merge them.
+ if (ValLR+1 != BLR) return false;
+
+ DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
+
+ SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'. Update the valnum with the new defining
+ // instruction #.
+ BValNo->def = FillerStart;
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValLR.end, BLR.begin) of either value number, then we merge the
+ // two value numbers.
+ IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValLR->valno)
+ IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+ DEBUG(dbgs() << " result = " << IntB << '\n');
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ if (UIdx != -1) {
+ ValLREndInst->getOperand(UIdx).setIsKill(false);
+ }
+
+ // Rewrite the copy. If the copy instruction was killing the destination
+ // register before the merge, find the last use and trim the live range. That
+ // will also add the isKill marker.
+ CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
+ if (ALR->end == CopyIdx)
+ LIS->shrinkToUses(&IntA);
+
+ ++numExtends;
+ return true;
+}
+
+/// hasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ // If AValNo has PHI kills, conservatively assume that IntB defs can reach
+ // the PHI values.
+ if (LIS->hasPHIKill(IntA, AValNo))
+ return true;
+
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ LiveInterval::Ranges::iterator BI =
+ std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+ if (BI != IntB.ranges.begin())
+ --BI;
+ for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+ if (BI->valno == BValNo)
+ continue;
+ if (BI->start <= AI->start && BI->end > AI->start)
+ return true;
+ if (BI->start > AI->start && BI->start < AI->end)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB. If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
+///
+/// A3 = op A2 B0<kill>
+/// ...
+/// B1 = A3 <- this copy
+/// ...
+/// = op A3 <- more uses
+///
+/// ==>
+///
+/// B2 = op B0 A2<kill>
+/// ...
+/// B1 = B2 <- now an identify copy
+/// ...
+/// = op B2 <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert (!CP.isPhys());
+
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+ if (!BValNo || BValNo->def != CopyIdx)
+ return false;
+
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
+ assert(AValNo && "COPY source not live");
+ if (AValNo->isPHIDef() || AValNo->isUnused())
+ return false;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
+ if (!DefMI)
+ return false;
+ if (!DefMI->isCommutable())
+ return false;
+ // If DefMI is a two-address instruction then commuting it will change the
+ // destination register.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+ assert(DefIdx != -1);
+ unsigned UseOpIdx;
+ if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+ return false;
+ unsigned Op1, Op2, NewDstIdx;
+ if (!TII->findCommutedOpIndices(DefMI, Op1, Op2))
+ return false;
+ if (Op1 == UseOpIdx)
+ NewDstIdx = Op2;
+ else if (Op2 == UseOpIdx)
+ NewDstIdx = Op1;
+ else
+ return false;
+
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill())
+ return false;
+
+ // Make sure there are no other definitions of IntB that would reach the
+ // uses which the new definition can reach.
+ if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ return false;
+
+ // If some of the uses of IntA.reg is already coalesced away, return false.
+ // It's not possible to determine whether it's safe to perform the coalescing.
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ MRI->use_nodbg_begin(IntA.reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end() || ULR->valno != AValNo)
+ continue;
+ // If this use is tied to a def, we can't rewrite the register.
+ if (UseMI->isRegTiedToDefOperand(UI.getOperandNo()))
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
+ << *DefMI);
+
+ // At this point we have decided that it is legal to do this
+ // transformation. Start by commuting the instruction.
+ MachineBasicBlock *MBB = DefMI->getParent();
+ MachineInstr *NewMI = TII->commuteInstruction(DefMI);
+ if (!NewMI)
+ return false;
+ if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
+ TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+ !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
+ return false;
+ if (NewMI != DefMI) {
+ LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
+ MachineBasicBlock::iterator Pos = DefMI;
+ MBB->insert(Pos, NewMI);
+ MBB->erase(DefMI);
+ }
+ unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+ NewMI->getOperand(OpIdx).setIsKill();
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = A<kill>
+ // ...
+ // = B
+
+ // Update uses of IntA of the specific Val# with IntB.
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
+ UE = MRI->use_end(); UI != UE;) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI->isDebugValue()) {
+ // FIXME These don't have an instruction index. Not clear we have enough
+ // info to decide whether to do this replacement or not. For now do it.
+ UseMO.setReg(NewReg);
+ continue;
+ }
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end() || ULR->valno != AValNo)
+ continue;
+ // Kill flags are no longer accurate. They are recomputed after RA.
+ UseMO.setIsKill(false);
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ UseMO.substPhysReg(NewReg, *TRI);
+ else
+ UseMO.setReg(NewReg);
+ if (UseMI == CopyMI)
+ continue;
+ if (!UseMI->isCopy())
+ continue;
+ if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+
+ // This copy will become a noop. If it's defining a new val#, merge it into
+ // BValNo.
+ SlotIndex DefIdx = UseIdx.getRegSlot();
+ VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+ if (!DVNI)
+ continue;
+ DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+ assert(DVNI->def == DefIdx);
+ BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+ ErasedInstrs.insert(UseMI);
+ LIS->RemoveMachineInstrFromMaps(UseMI);
+ UseMI->eraseFromParent();
+ }
+
+ // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+ // is updated.
+ VNInfo *ValNo = BValNo;
+ ValNo->def = AValNo->def;
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
+ }
+ DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+ IntA.removeValNo(AValNo);
+ DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
+ ++numCommutes;
+ return true;
+}
+
+/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+ unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ LiveInterval &SrcInt = LIS->getInterval(SrcReg);
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
+ LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+ assert(SrcLR != SrcInt.end() && "Live range not found!");
+ VNInfo *ValNo = SrcLR->valno;
+ if (ValNo->isPHIDef() || ValNo->isUnused())
+ return false;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
+ if (!DefMI)
+ return false;
+ assert(DefMI && "Defining instruction disappeared");
+ if (!DefMI->isAsCheapAsAMove())
+ return false;
+ if (!TII->isTriviallyReMaterializable(DefMI, AA))
+ return false;
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(TII, AA, SawStore))
+ return false;
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+ // Only support subregister destinations when the def is read-undef.
+ MachineOperand &DstOperand = CopyMI->getOperand(0);
+ if (DstOperand.getSubReg() && !DstOperand.isUndef())
+ return false;
+ if (!DefMI->isImplicitDef()) {
+ // Make sure the copy destination register class fits the instruction
+ // definition register class. The mismatch can happen as a result of earlier
+ // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ if (!MRI->constrainRegClass(DstReg, RC))
+ return false;
+ } else if (!RC->contains(DstReg))
+ return false;
+ }
+
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ MachineBasicBlock::iterator MII =
+ llvm::next(MachineBasicBlock::iterator(CopyMI));
+ TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
+ MachineInstr *NewMI = prior(MII);
+
+ // The original DefMI may have been a subregister def, but the full register
+ // class of its destination matches the destination of CopyMI, and CopyMI is
+ // either a full register def or is read-undef. Therefore we can clear the
+ // subregister index on the rematerialized instruction.
+ NewMI->getOperand(0).setSubReg(0);
+
+ // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
+ // We need to remember these so we can add intervals once we insert
+ // NewMI into SlotIndexes.
+ SmallVector<unsigned, 4> NewMIImplDefs;
+ for (unsigned i = NewMI->getDesc().getNumOperands(),
+ e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (MO.isReg()) {
+ assert(MO.isDef() && MO.isImplicit() && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
+ NewMIImplDefs.push_back(MO.getReg());
+ }
+ }
+
+ // CopyMI may have implicit operands, transfer them over to the newly
+ // rematerialized instruction. And update implicit def interval valnos.
+ for (unsigned i = CopyMI->getDesc().getNumOperands(),
+ e = CopyMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = CopyMI->getOperand(i);
+ if (MO.isReg()) {
+ assert(MO.isImplicit() && "No explicit operands after implict operands.");
+ // Discard VReg implicit defs.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ NewMI->addOperand(MO);
+ }
+ }
+ }
+
+ LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+
+ SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+ for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
+ unsigned Reg = NewMIImplDefs[i];
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = LIS->getCachedRegUnit(*Units))
+ LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
+ }
+
+ CopyMI->eraseFromParent();
+ ErasedInstrs.insert(CopyMI);
+ DEBUG(dbgs() << "Remat: " << *NewMI);
+ ++NumReMats;
+
+ // The source interval can become smaller because we removed a use.
+ LIS->shrinkToUses(&SrcInt, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs();
+
+ return true;
+}
+
+/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef>
+/// values, it only removes local variables. When we have a copy like:
+///
+/// %vreg1 = COPY %vreg2<undef>
+///
+/// We delete the copy and remove the corresponding value number from %vreg1.
+/// Any uses of that value number are marked as <undef>.
+bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
+ const CoalescerPair &CP) {
+ SlotIndex Idx = LIS->getInstructionIndex(CopyMI);
+ LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg());
+ if (SrcInt->liveAt(Idx))
+ return false;
+ LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg());
+ if (DstInt->liveAt(Idx))
+ return false;
+
+ // No intervals are live-in to CopyMI - it is undef.
+ if (CP.isFlipped())
+ DstInt = SrcInt;
+ SrcInt = 0;
+
+ VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot());
+ assert(DeadVNI && "No value defined in DstInt");
+ DstInt->removeValNo(DeadVNI);
+
+ // Find new undef uses.
+ for (MachineRegisterInfo::reg_nodbg_iterator
+ I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end();
+ I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ if (MO.isDef() || MO.isUndef())
+ continue;
+ MachineInstr *MI = MO.getParent();
+ SlotIndex Idx = LIS->getInstructionIndex(MI);
+ if (DstInt->liveAt(Idx))
+ continue;
+ MO.setIsUndef(true);
+ DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI);
+ }
+ return true;
+}
+
+/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
+ unsigned DstReg,
+ unsigned SubIdx) {
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
+
+ SmallPtrSet<MachineInstr*, 8> Visited;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ // Each instruction can only be rewritten once because sub-register
+ // composition is not always idempotent. When SrcReg != DstReg, rewriting
+ // the UseMI operands removes them from the SrcReg use-def chain, but when
+ // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+ // operands mentioning the virtual register.
+ if (SrcReg == DstReg && !Visited.insert(UseMI))
+ continue;
+
+ SmallVector<unsigned,8> Ops;
+ bool Reads, Writes;
+ tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+
+ // If SrcReg wasn't read, it may still be the case that DstReg is live-in
+ // because SrcReg is a sub-register.
+ if (DstInt && !Reads && SubIdx)
+ Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI));
+
+ // Replace SrcReg with DstReg in all UseMI operands.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(Ops[i]);
+
+ // Adjust <undef> flags in case of sub-register joins. We don't want to
+ // turn a full def into a read-modify-write sub-register def and vice
+ // versa.
+ if (SubIdx && MO.isDef())
+ MO.setIsUndef(!Reads);
+
+ if (DstIsPhys)
+ MO.substPhysReg(DstReg, *TRI);
+ else
+ MO.substVirtReg(DstReg, SubIdx, *TRI);
+ }
+
+ DEBUG({
+ dbgs() << "\t\tupdated: ";
+ if (!UseMI->isDebugValue())
+ dbgs() << LIS->getInstructionIndex(UseMI) << "\t";
+ dbgs() << *UseMI;
+ });
+ }
+}
+
+/// canJoinPhys - Return true if a copy involving a physreg should be joined.
+bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
+ /// Always join simple intervals that are defined by a single copy from a
+ /// reserved register. This doesn't increase register pressure, so it is
+ /// always beneficial.
+ if (!MRI->isReserved(CP.getDstReg())) {
+ DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
+ return false;
+ }
+
+ LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
+ if (CP.isFlipped() && JoinVInt.containsOneValue())
+ return true;
+
+ DEBUG(dbgs() << "\tCannot join defs into reserved register.\n");
+ return false;
+}
+
+/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI. This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
+
+ Again = false;
+ DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+
+ CoalescerPair CP(*TRI);
+ if (!CP.setRegisters(CopyMI)) {
+ DEBUG(dbgs() << "\tNot coalescable.\n");
+ return false;
+ }
+
+ // Dead code elimination. This really should be handled by MachineDCE, but
+ // sometimes dead copies slip through, and we can't generate invalid live
+ // ranges.
+ if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
+ DEBUG(dbgs() << "\tCopy is dead.\n");
+ DeadDefs.push_back(CopyMI);
+ eliminateDeadDefs();
+ return true;
+ }
+
+ // Eliminate undefs.
+ if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) {
+ DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n");
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ return false; // Not coalescable.
+ }
+
+ // Coalesced copies are normally removed immediately, but transformations
+ // like removeCopyByCommutingDef() can inadvertently create identity copies.
+ // When that happens, just join the values and remove the copy.
+ if (CP.getSrcReg() == CP.getDstReg()) {
+ LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
+ LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI));
+ if (VNInfo *DefVNI = LRQ.valueDefined()) {
+ VNInfo *ReadVNI = LRQ.valueIn();
+ assert(ReadVNI && "No value before copy and no <undef> flag.");
+ assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
+ LI.MergeValueNumberInto(DefVNI, ReadVNI);
+ DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
+ }
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ return true;
+ }
+
+ // Enforce policies.
+ if (CP.isPhys()) {
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
+ << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx())
+ << '\n');
+ if (!canJoinPhys(CP)) {
+ // Before giving up coalescing, if definition of source is defined by
+ // trivial computation, try rematerializing it.
+ if (reMaterializeTrivialDef(CP, CopyMI))
+ return true;
+ return false;
+ }
+ } else {
+ DEBUG({
+ dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName()
+ << " with ";
+ if (CP.getDstIdx() && CP.getSrcIdx())
+ dbgs() << PrintReg(CP.getDstReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
+ << PrintReg(CP.getSrcReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
+ else
+ dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
+ << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
+ });
+
+ // When possible, let DstReg be the larger interval.
+ if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
+ LIS->getInterval(CP.getDstReg()).ranges.size())
+ CP.flip();
+ }
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ if (!joinIntervals(CP)) {
+ // Coalescing failed.
+
+ // If definition of source is defined by trivial computation, try
+ // rematerializing it.
+ if (reMaterializeTrivialDef(CP, CopyMI))
+ return true;
+
+ // If we can eliminate the copy without merging the live ranges, do so now.
+ if (!CP.isPartial() && !CP.isPhys()) {
+ if (adjustCopiesBackFrom(CP, CopyMI) ||
+ removeCopyByCommutingDef(CP, CopyMI)) {
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ DEBUG(dbgs() << "\tTrivial!\n");
+ return true;
+ }
+ }
+
+ // Otherwise, we are unable to join the intervals.
+ DEBUG(dbgs() << "\tInterference!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+
+ // Coalescing to a virtual register that is of a sub-register class of the
+ // other. Make sure the resulting register is set to the right register class.
+ if (CP.isCrossClass()) {
+ ++numCrossRCs;
+ MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
+ }
+
+ // Removing sub-register copies can ease the register class constraints.
+ // Make sure we attempt to inflate the register class of DstReg.
+ if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
+ InflateRegs.push_back(CP.getDstReg());
+
+ // CopyMI has been erased by joinIntervals at this point. Remove it from
+ // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
+ // to the work list. This keeps ErasedInstrs from growing needlessly.
+ ErasedInstrs.erase(CopyMI);
+
+ // Rewrite all SrcReg operands to DstReg.
+ // Also update DstReg operands to include DstIdx if it is set.
+ if (CP.getDstIdx())
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+
+ // SrcReg is guaranteed to be the register whose live interval that is
+ // being merged.
+ LIS->removeInterval(CP.getSrcReg());
+
+ // Update regalloc hint.
+ TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
+
+ DEBUG({
+ dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI);
+ if (!CP.isPhys())
+ dbgs() << LIS->getInterval(CP.getDstReg());
+ dbgs() << '\n';
+ });
+
+ ++numJoins;
+ return true;
+}
+
+/// Attempt joining with a reserved physreg.
+bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
+ assert(CP.isPhys() && "Must be a physreg copy");
+ assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+ << '\n');
+
+ assert(CP.isFlipped() && RHS.containsOneValue() &&
+ "Invalid join with reserved register");
+
+ // Optimization for reserved registers like ESP. We can only merge with a
+ // reserved physreg if RHS has a single value that is a copy of CP.DstReg().
+ // The live range of the reserved register will look like a set of dead defs
+ // - we don't properly track the live range of reserved registers.
+
+ // Deny any overlapping intervals. This depends on all the reserved
+ // register live ranges to look like dead defs.
+ for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI)
+ if (RHS.overlaps(LIS->getRegUnit(*UI))) {
+ DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+ return false;
+ }
+
+ // Skip any value computations, we are not adding new values to the
+ // reserved register. Also skip merging the live ranges, the reserved
+ // register live range doesn't need to be accurate as long as all the
+ // defs are there.
+
+ // Delete the identity copy.
+ MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+
+ // We don't track kills for reserved registers.
+ MRI->clearKillFlags(CP.getSrcReg());
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Interference checking and interval joining
+//===----------------------------------------------------------------------===//
+//
+// In the easiest case, the two live ranges being joined are disjoint, and
+// there is no interference to consider. It is quite common, though, to have
+// overlapping live ranges, and we need to check if the interference can be
+// resolved.
+//
+// The live range of a single SSA value forms a sub-tree of the dominator tree.
+// This means that two SSA values overlap if and only if the def of one value
+// is contained in the live range of the other value. As a special case, the
+// overlapping values can be defined at the same index.
+//
+// The interference from an overlapping def can be resolved in these cases:
+//
+// 1. Coalescable copies. The value is defined by a copy that would become an
+// identity copy after joining SrcReg and DstReg. The copy instruction will
+// be removed, and the value will be merged with the source value.
+//
+// There can be several copies back and forth, causing many values to be
+// merged into one. We compute a list of ultimate values in the joined live
+// range as well as a mappings from the old value numbers.
+//
+// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
+// predecessors have a live out value. It doesn't cause real interference,
+// and can be merged into the value it overlaps. Like a coalescable copy, it
+// can be erased after joining.
+//
+// 3. Copy of external value. The overlapping def may be a copy of a value that
+// is already in the other register. This is like a coalescable copy, but
+// the live range of the source register must be trimmed after erasing the
+// copy instruction:
+//
+// %src = COPY %ext
+// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
+//
+// 4. Clobbering undefined lanes. Vector registers are sometimes built by
+// defining one lane at a time:
+//
+// %dst:ssub0<def,read-undef> = FOO
+// %src = BAR
+// %dst:ssub1<def> = COPY %src
+//
+// The live range of %src overlaps the %dst value defined by FOO, but
+// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
+// which was undef anyway.
+//
+// The value mapping is more complicated in this case. The final live range
+// will have different value numbers for both FOO and BAR, but there is no
+// simple mapping from old to new values. It may even be necessary to add
+// new PHI values.
+//
+// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
+// is live, but never read. This can happen because we don't compute
+// individual live ranges per lane.
+//
+// %dst<def> = FOO
+// %src = BAR
+// %dst:ssub1<def> = COPY %src
+//
+// This kind of interference is only resolved locally. If the clobbered
+// lane value escapes the block, the join is aborted.
+
+namespace {
+/// Track information about values in a single virtual register about to be
+/// joined. Objects of this class are always created in pairs - one for each
+/// side of the CoalescerPair.
+class JoinVals {
+ LiveInterval &LI;
+
+ // Location of this register in the final joined register.
+ // Either CP.DstIdx or CP.SrcIdx.
+ unsigned SubIdx;
+
+ // Values that will be present in the final live range.
+ SmallVectorImpl<VNInfo*> &NewVNInfo;
+
+ const CoalescerPair &CP;
+ LiveIntervals *LIS;
+ SlotIndexes *Indexes;
+ const TargetRegisterInfo *TRI;
+
+ // Value number assignments. Maps value numbers in LI to entries in NewVNInfo.
+ // This is suitable for passing to LiveInterval::join().
+ SmallVector<int, 8> Assignments;
+
+ // Conflict resolution for overlapping values.
+ enum ConflictResolution {
+ // No overlap, simply keep this value.
+ CR_Keep,
+
+ // Merge this value into OtherVNI and erase the defining instruction.
+ // Used for IMPLICIT_DEF, coalescable copies, and copies from external
+ // values.
+ CR_Erase,
+
+ // Merge this value into OtherVNI but keep the defining instruction.
+ // This is for the special case where OtherVNI is defined by the same
+ // instruction.
+ CR_Merge,
+
+ // Keep this value, and have it replace OtherVNI where possible. This
+ // complicates value mapping since OtherVNI maps to two different values
+ // before and after this def.
+ // Used when clobbering undefined or dead lanes.
+ CR_Replace,
+
+ // Unresolved conflict. Visit later when all values have been mapped.
+ CR_Unresolved,
+
+ // Unresolvable conflict. Abort the join.
+ CR_Impossible
+ };
+
+ // Per-value info for LI. The lane bit masks are all relative to the final
+ // joined register, so they can be compared directly between SrcReg and
+ // DstReg.
+ struct Val {
+ ConflictResolution Resolution;
+
+ // Lanes written by this def, 0 for unanalyzed values.
+ unsigned WriteLanes;
+
+ // Lanes with defined values in this register. Other lanes are undef and
+ // safe to clobber.
+ unsigned ValidLanes;
+
+ // Value in LI being redefined by this def.
+ VNInfo *RedefVNI;
+
+ // Value in the other live range that overlaps this def, if any.
+ VNInfo *OtherVNI;
+
+ // Is this value an IMPLICIT_DEF that can be erased?
+ //
+ // IMPLICIT_DEF values should only exist at the end of a basic block that
+ // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+ // safely erased if they are overlapping a live value in the other live
+ // interval.
+ //
+ // Weird control flow graphs and incomplete PHI handling in
+ // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+ // longer live ranges. Such IMPLICIT_DEF values should be treated like
+ // normal values.
+ bool ErasableImplicitDef;
+
+ // True when the live range of this value will be pruned because of an
+ // overlapping CR_Replace value in the other live range.
+ bool Pruned;
+
+ // True once Pruned above has been computed.
+ bool PrunedComputed;
+
+ Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
+ RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false),
+ Pruned(false), PrunedComputed(false) {}
+
+ bool isAnalyzed() const { return WriteLanes != 0; }
+ };
+
+ // One entry per value number in LI.
+ SmallVector<Val, 8> Vals;
+
+ unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef);
+ VNInfo *stripCopies(VNInfo *VNI);
+ ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+ void computeAssignment(unsigned ValNo, JoinVals &Other);
+ bool taintExtent(unsigned, unsigned, JoinVals&,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+ bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned);
+ bool isPrunedValue(unsigned ValNo, JoinVals &Other);
+
+public:
+ JoinVals(LiveInterval &li, unsigned subIdx,
+ SmallVectorImpl<VNInfo*> &newVNInfo,
+ const CoalescerPair &cp,
+ LiveIntervals *lis,
+ const TargetRegisterInfo *tri)
+ : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis),
+ Indexes(LIS->getSlotIndexes()), TRI(tri),
+ Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums())
+ {}
+
+ /// Analyze defs in LI and compute a value mapping in NewVNInfo.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool mapValues(JoinVals &Other);
+
+ /// Try to resolve conflicts that require all values to be mapped.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool resolveConflicts(JoinVals &Other);
+
+ /// Prune the live range of values in Other.LI where they would conflict with
+ /// CR_Replace values in LI. Collect end points for restoring the live range
+ /// after joining.
+ void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints);
+
+ /// Erase any machine instructions that have been coalesced away.
+ /// Add erased instructions to ErasedInstrs.
+ /// Add foreign virtual registers to ShrinkRegs if their live range ended at
+ /// the erased instrs.
+ void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+ SmallVectorImpl<unsigned> &ShrinkRegs);
+
+ /// Get the value assignments suitable for passing to LiveInterval::join.
+ const int *getAssignments() const { return Assignments.data(); }
+};
+} // end anonymous namespace
+
+/// Compute the bitmask of lanes actually written by DefMI.
+/// Set Redef if there are any partial register definitions that depend on the
+/// previous value of the register.
+unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) {
+ unsigned L = 0;
+ for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef())
+ continue;
+ L |= TRI->getSubRegIndexLaneMask(
+ TRI->composeSubRegIndices(SubIdx, MO->getSubReg()));
+ if (MO->readsReg())
+ Redef = true;
+ }
+ return L;
+}
+
+/// Find the ultimate value that VNI was copied from.
+VNInfo *JoinVals::stripCopies(VNInfo *VNI) {
+ while (!VNI->isPHIDef()) {
+ MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def);
+ assert(MI && "No defining instruction");
+ if (!MI->isFullCopy())
+ break;
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ break;
+ LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def);
+ if (!LRQ.valueIn())
+ break;
+ VNI = LRQ.valueIn();
+ }
+ return VNI;
+}
+
+/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+/// Return a conflict resolution when possible, but leave the hard cases as
+/// CR_Unresolved.
+/// Recursively calls computeAssignment() on this and Other, guaranteeing that
+/// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+/// The recursion always goes upwards in the dominator tree, making loops
+/// impossible.
+JoinVals::ConflictResolution
+JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ assert(!V.isAnalyzed() && "Value has already been analyzed!");
+ VNInfo *VNI = LI.getValNumInfo(ValNo);
+ if (VNI->isUnused()) {
+ V.WriteLanes = ~0u;
+ return CR_Keep;
+ }
+
+ // Get the instruction defining this value, compute the lanes written.
+ const MachineInstr *DefMI = 0;
+ if (VNI->isPHIDef()) {
+ // Conservatively assume that all lanes in a PHI are valid.
+ V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ } else {
+ DefMI = Indexes->getInstructionFromIndex(VNI->def);
+ bool Redef = false;
+ V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
+
+ // If this is a read-modify-write instruction, there may be more valid
+ // lanes than the ones written by this instruction.
+ // This only covers partial redef operands. DefMI may have normal use
+ // operands reading the register. They don't contribute valid lanes.
+ //
+ // This adds ssub1 to the set of valid lanes in %src:
+ //
+ // %src:ssub1<def> = FOO
+ //
+ // This leaves only ssub1 valid, making any other lanes undef:
+ //
+ // %src:ssub1<def,read-undef> = FOO %src:ssub2
+ //
+ // The <read-undef> flag on the def operand means that old lane values are
+ // not important.
+ if (Redef) {
+ V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn();
+ assert(V.RedefVNI && "Instruction is reading nonexistent value");
+ computeAssignment(V.RedefVNI->id, Other);
+ V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
+ }
+
+ // An IMPLICIT_DEF writes undef values.
+ if (DefMI->isImplicitDef()) {
+ // We normally expect IMPLICIT_DEF values to be live only until the end
+ // of their block. If the value is really live longer and gets pruned in
+ // another block, this flag is cleared again.
+ V.ErasableImplicitDef = true;
+ V.ValidLanes &= ~V.WriteLanes;
+ }
+ }
+
+ // Find the value in Other that overlaps VNI->def, if any.
+ LiveRangeQuery OtherLRQ(Other.LI, VNI->def);
+
+ // It is possible that both values are defined by the same instruction, or
+ // the values are PHIs defined in the same block. When that happens, the two
+ // values should be merged into one, but not into any preceding value.
+ // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
+ if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
+ assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
+
+ // One value stays, the other is merged. Keep the earlier one, or the first
+ // one we see.
+ if (OtherVNI->def < VNI->def)
+ Other.computeAssignment(OtherVNI->id, *this);
+ else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
+ // This is an early-clobber def overlapping a live-in value in the other
+ // register. Not mergeable.
+ V.OtherVNI = OtherLRQ.valueIn();
+ return CR_Impossible;
+ }
+ V.OtherVNI = OtherVNI;
+ Val &OtherV = Other.Vals[OtherVNI->id];
+ // Keep this value, check for conflicts when analyzing OtherVNI.
+ if (!OtherV.isAnalyzed())
+ return CR_Keep;
+ // Both sides have been analyzed now.
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Merge;
+ if (V.ValidLanes & OtherV.ValidLanes)
+ // Overlapping lanes can't be resolved.
+ return CR_Impossible;
+ else
+ return CR_Merge;
+ }
+
+ // No simultaneous def. Is Other live at the def?
+ V.OtherVNI = OtherLRQ.valueIn();
+ if (!V.OtherVNI)
+ // No overlap, no conflict.
+ return CR_Keep;
+
+ assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
+
+ // We have overlapping values, or possibly a kill of Other.
+ // Recursively compute assignments up the dominator tree.
+ Other.computeAssignment(V.OtherVNI->id, *this);
+ Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+ // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+ // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+ // technically.
+ //
+ // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+ // to erase the IMPLICIT_DEF instruction.
+ if (OtherV.ErasableImplicitDef && DefMI &&
+ DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+ DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " extends into BB#" << DefMI->getParent()->getNumber()
+ << ", keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
+ }
+
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Replace;
+
+ // Check for simple erasable conflicts.
+ if (DefMI->isImplicitDef())
+ return CR_Erase;
+
+ // Include the non-conflict where DefMI is a coalescable copy that kills
+ // OtherVNI. We still want the copy erased and value numbers merged.
+ if (CP.isCoalescable(DefMI)) {
+ // Some of the lanes copied from OtherVNI may be undef, making them undef
+ // here too.
+ V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
+ return CR_Erase;
+ }
+
+ // This may not be a real conflict if DefMI simply kills Other and defines
+ // VNI.
+ if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
+ return CR_Keep;
+
+ // Handle the case where VNI and OtherVNI can be proven to be identical:
+ //
+ // %other = COPY %ext
+ // %this = COPY %ext <-- Erase this copy
+ //
+ if (DefMI->isFullCopy() && !CP.isPartial() &&
+ stripCopies(VNI) == stripCopies(V.OtherVNI))
+ return CR_Erase;
+
+ // If the lanes written by this instruction were all undef in OtherVNI, it is
+ // still safe to join the live ranges. This can't be done with a simple value
+ // mapping, though - OtherVNI will map to multiple values:
+ //
+ // 1 %dst:ssub0 = FOO <-- OtherVNI
+ // 2 %src = BAR <-- VNI
+ // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy.
+ // 4 BAZ %dst<kill>
+ // 5 QUUX %src<kill>
+ //
+ // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
+ // handles this complex value mapping.
+ if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+ return CR_Replace;
+
+ // If the other live range is killed by DefMI and the live ranges are still
+ // overlapping, it must be because we're looking at an early clobber def:
+ //
+ // %dst<def,early-clobber> = ASM %src<kill>
+ //
+ // In this case, it is illegal to merge the two live ranges since the early
+ // clobber def would clobber %src before it was read.
+ if (OtherLRQ.isKill()) {
+ // This case where the def doesn't overlap the kill is handled above.
+ assert(VNI->def.isEarlyClobber() &&
+ "Only early clobber defs can overlap a kill");
+ return CR_Impossible;
+ }
+
+ // VNI is clobbering live lanes in OtherVNI, but there is still the
+ // possibility that no instructions actually read the clobbered lanes.
+ // If we're clobbering all the lanes in OtherVNI, at least one must be read.
+ // Otherwise Other.LI wouldn't be live here.
+ if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+ return CR_Impossible;
+
+ // We need to verify that no instructions are reading the clobbered lanes. To
+ // save compile time, we'll only check that locally. Don't allow the tainted
+ // value to escape the basic block.
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
+ return CR_Impossible;
+
+ // There are still some things that could go wrong besides clobbered lanes
+ // being read, for example OtherVNI may be only partially redefined in MBB,
+ // and some clobbered lanes could escape the block. Save this analysis for
+ // resolveConflicts() when all values have been mapped. We need to know
+ // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
+ // that now - the recursive analyzeValue() calls must go upwards in the
+ // dominator tree.
+ return CR_Unresolved;
+}
+
+/// Compute the value assignment for ValNo in LI.
+/// This may be called recursively by analyzeValue(), but never for a ValNo on
+/// the stack.
+void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.isAnalyzed()) {
+ // Recursion should always move up the dominator tree, so ValNo is not
+ // supposed to reappear before it has been assigned.
+ assert(Assignments[ValNo] != -1 && "Bad recursion?");
+ return;
+ }
+ switch ((V.Resolution = analyzeValue(ValNo, Other))) {
+ case CR_Erase:
+ case CR_Merge:
+ // Merge this ValNo into OtherVNI.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
+ assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
+ Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
+ DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@'
+ << LI.getValNumInfo(ValNo)->def << " into "
+ << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@'
+ << V.OtherVNI->def << " --> @"
+ << NewVNInfo[Assignments[ValNo]]->def << '\n');
+ break;
+ case CR_Replace:
+ case CR_Unresolved:
+ // The other value is going to be pruned if this join is successful.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
+ Other.Vals[V.OtherVNI->id].Pruned = true;
+ // Fall through.
+ default:
+ // This value number needs to go in the final joined live range.
+ Assignments[ValNo] = NewVNInfo.size();
+ NewVNInfo.push_back(LI.getValNumInfo(ValNo));
+ break;
+ }
+}
+
+bool JoinVals::mapValues(JoinVals &Other) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ computeAssignment(i, Other);
+ if (Vals[i].Resolution == CR_Impossible) {
+ DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i
+ << '@' << LI.getValNumInfo(i)->def << '\n');
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute
+/// the extent of the tainted lanes in the block.
+///
+/// Multiple values in Other.LI can be affected since partial redefinitions can
+/// preserve previously tainted lanes.
+///
+/// 1 %dst = VLOAD <-- Define all lanes in %dst
+/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
+/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
+/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+///
+/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+/// entry to TaintedVals.
+///
+/// Returns false if the tainted lanes extend beyond the basic block.
+bool JoinVals::
+taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
+ VNInfo *VNI = LI.getValNumInfo(ValNo);
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
+
+ // Scan Other.LI from VNI.def to MBBEnd.
+ LiveInterval::iterator OtherI = Other.LI.find(VNI->def);
+ assert(OtherI != Other.LI.end() && "No conflict?");
+ do {
+ // OtherI is pointing to a tainted value. Abort the join if the tainted
+ // lanes escape the block.
+ SlotIndex End = OtherI->end;
+ if (End >= MBBEnd) {
+ DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start << '\n');
+ return false;
+ }
+ DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start
+ << " to " << End << '\n');
+ // A dead def is not a problem.
+ if (End.isDead())
+ break;
+ TaintExtent.push_back(std::make_pair(End, TaintedLanes));
+
+ // Check for another def in the MBB.
+ if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd)
+ break;
+
+ // Lanes written by the new def are no longer tainted.
+ const Val &OV = Other.Vals[OtherI->valno->id];
+ TaintedLanes &= ~OV.WriteLanes;
+ if (!OV.RedefVNI)
+ break;
+ } while (TaintedLanes);
+ return true;
+}
+
+/// Return true if MI uses any of the given Lanes from Reg.
+/// This does not include partial redefinitions of Reg.
+bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx,
+ unsigned Lanes) {
+ if (MI->isDebugValue())
+ return false;
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg)
+ continue;
+ if (!MO->readsReg())
+ continue;
+ if (Lanes & TRI->getSubRegIndexLaneMask(
+ TRI->composeSubRegIndices(SubIdx, MO->getSubReg())))
+ return true;
+ }
+ return false;
+}
+
+bool JoinVals::resolveConflicts(JoinVals &Other) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
+ if (V.Resolution != CR_Unresolved)
+ continue;
+ DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i
+ << '@' << LI.getValNumInfo(i)->def << '\n');
+ ++NumLaneConflicts;
+ assert(V.OtherVNI && "Inconsistent conflict resolution.");
+ VNInfo *VNI = LI.getValNumInfo(i);
+ const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
+ // join, those lanes will be tainted with a wrong value. Get the extent of
+ // the tainted lanes.
+ unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+ SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
+ if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
+ // Tainted lanes would extend beyond the basic block.
+ return false;
+
+ assert(!TaintExtent.empty() && "There should be at least one conflict.");
+
+ // Now look at the instructions from VNI->def to TaintExtent (inclusive).
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ MachineBasicBlock::iterator MI = MBB->begin();
+ if (!VNI->isPHIDef()) {
+ MI = Indexes->getInstructionFromIndex(VNI->def);
+ // No need to check the instruction defining VNI for reads.
+ ++MI;
+ }
+ assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
+ "Interference ends on VNI->def. Should have been handled earlier");
+ MachineInstr *LastMI =
+ Indexes->getInstructionFromIndex(TaintExtent.front().first);
+ assert(LastMI && "Range must end at a proper instruction");
+ unsigned TaintNum = 0;
+ for(;;) {
+ assert(MI != MBB->end() && "Bad LastMI");
+ if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) {
+ DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
+ return false;
+ }
+ // LastMI is the last instruction to use the current value.
+ if (&*MI == LastMI) {
+ if (++TaintNum == TaintExtent.size())
+ break;
+ LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
+ assert(LastMI && "Range must end at a proper instruction");
+ TaintedLanes = TaintExtent[TaintNum].second;
+ }
+ ++MI;
+ }
+
+ // The tainted lanes are unused.
+ V.Resolution = CR_Replace;
+ ++NumLaneResolves;
+ }
+ return true;
+}
+
+// Determine if ValNo is a copy of a value number in LI or Other.LI that will
+// be pruned:
+//
+// %dst = COPY %src
+// %src = COPY %dst <-- This value to be pruned.
+// %dst = COPY %src <-- This value is a copy of a pruned value.
+//
+bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.Pruned || V.PrunedComputed)
+ return V.Pruned;
+
+ if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
+ return V.Pruned;
+
+ // Follow copies up the dominator tree and check if any intermediate value
+ // has been pruned.
+ V.PrunedComputed = true;
+ V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
+ return V.Pruned;
+}
+
+void JoinVals::pruneValues(JoinVals &Other,
+ SmallVectorImpl<SlotIndex> &EndPoints) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ SlotIndex Def = LI.getValNumInfo(i)->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep:
+ break;
+ case CR_Replace: {
+ // This value takes precedence over the value in Other.LI.
+ LIS->pruneValue(&Other.LI, Def, &EndPoints);
+ // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+ // instructions are only inserted to provide a live-out value for PHI
+ // predecessors, so the instruction should simply go away once its value
+ // has been replaced.
+ Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+ bool EraseImpDef = OtherV.ErasableImplicitDef &&
+ OtherV.Resolution == CR_Keep;
+ if (!Def.isBlock()) {
+ // Remove <def,read-undef> flags. This def is now a partial redef.
+ // Also remove <def,dead> flags since the joined live range will
+ // continue past this instruction.
+ for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
+ MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
+ MO->setIsUndef(EraseImpDef);
+ MO->setIsDead(false);
+ }
+ // This value will reach instructions below, but we need to make sure
+ // the live range also reaches the instruction at Def.
+ if (!EraseImpDef)
+ EndPoints.push_back(Def);
+ }
+ DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
+ << ": " << Other.LI << '\n');
+ break;
+ }
+ case CR_Erase:
+ case CR_Merge:
+ if (isPrunedValue(i, Other)) {
+ // This value is ultimately a copy of a pruned value in LI or Other.LI.
+ // We can no longer trust the value mapping computed by
+ // computeAssignment(), the value that was originally copied could have
+ // been replaced.
+ LIS->pruneValue(&LI, Def, &EndPoints);
+ DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at "
+ << Def << ": " << LI << '\n');
+ }
+ break;
+ case CR_Unresolved:
+ case CR_Impossible:
+ llvm_unreachable("Unresolved conflicts");
+ }
+ }
+}
+
+void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+ SmallVectorImpl<unsigned> &ShrinkRegs) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ // Get the def location before markUnused() below invalidates it.
+ SlotIndex Def = LI.getValNumInfo(i)->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep:
+ // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+ // longer. The IMPLICIT_DEF instructions are only inserted by
+ // PHIElimination to guarantee that all PHI predecessors have a value.
+ if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
+ break;
+ // Remove value number i from LI. Note that this VNInfo is still present
+ // in NewVNInfo, so it will appear as an unused value number in the final
+ // joined interval.
+ LI.getValNumInfo(i)->markUnused();
+ LI.removeValNo(LI.getValNumInfo(i));
+ DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
+ // FALL THROUGH.
+
+ case CR_Erase: {
+ MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+ assert(MI && "No instruction to erase");
+ if (MI->isCopy()) {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+ ShrinkRegs.push_back(Reg);
+ }
+ ErasedInstrs.insert(MI);
+ DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
+ SmallVector<VNInfo*, 16> NewVNInfo;
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
+ JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI);
+ JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI);
+
+ DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+ << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS
+ << '\n');
+
+ // First compute NewVNInfo and the simple value mappings.
+ // Detect impossible conflicts early.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
+ return false;
+
+ // Some conflicts can only be resolved after all values have been mapped.
+ if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
+ return false;
+
+ // All clear, the live ranges can be merged.
+
+ // The merging algorithm in LiveInterval::join() can't handle conflicting
+ // value mappings, so we need to remove any live ranges that overlap a
+ // CR_Replace resolution. Collect a set of end points that can be used to
+ // restore the live range after joining.
+ SmallVector<SlotIndex, 8> EndPoints;
+ LHSVals.pruneValues(RHSVals, EndPoints);
+ RHSVals.pruneValues(LHSVals, EndPoints);
+
+ // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
+ // registers to require trimming.
+ SmallVector<unsigned, 8> ShrinkRegs;
+ LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ while (!ShrinkRegs.empty())
+ LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+
+ // Join RHS into LHS.
+ LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo,
+ MRI);
+
+ // Kill flags are going to be wrong if the live ranges were overlapping.
+ // Eventually, we should simply clear all kill flags when computing live
+ // ranges. They are reinserted after register allocation.
+ MRI->clearKillFlags(LHS.reg);
+ MRI->clearKillFlags(RHS.reg);
+
+ if (EndPoints.empty())
+ return true;
+
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+ << " points: " << LHS << '\n');
+ LIS->extendToIndices(&LHS, EndPoints);
+ return true;
+}
+
+/// joinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false.
+bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
+ return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
+}
+
+namespace {
+// Information concerning MBB coalescing priority.
+struct MBBPriorityInfo {
+ MachineBasicBlock *MBB;
+ unsigned Depth;
+ bool IsSplit;
+
+ MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
+ : MBB(mbb), Depth(depth), IsSplit(issplit) {}
+};
+}
+
+// C-style comparator that sorts first based on the loop depth of the basic
+// block (the unsigned), and then on the MBB number.
+//
+// EnableGlobalCopies assumes that the primary sort key is loop depth.
+static int compareMBBPriority(const void *L, const void *R) {
+ const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L);
+ const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R);
+ // Deeper loops first
+ if (LHS->Depth != RHS->Depth)
+ return LHS->Depth > RHS->Depth ? -1 : 1;
+
+ // Try to unsplit critical edges next.
+ if (LHS->IsSplit != RHS->IsSplit)
+ return LHS->IsSplit ? -1 : 1;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
+ unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
+ if (cl != cr)
+ return cl > cr ? -1 : 1;
+
+ // As a last resort, sort by block number.
+ return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
+}
+
+/// \returns true if the given copy uses or defines a local live range.
+static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
+ if (!Copy->isCopy())
+ return false;
+
+ unsigned SrcReg = Copy->getOperand(1).getReg();
+ unsigned DstReg = Copy->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
+ || TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return false;
+
+ return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
+ || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
+}
+
+// Try joining WorkList copies starting from index From.
+// Null out any successful joins.
+bool RegisterCoalescer::
+copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
+ bool Progress = false;
+ for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
+ if (!CurrList[i])
+ continue;
+ // Skip instruction pointers that have already been erased, for example by
+ // dead code elimination.
+ if (ErasedInstrs.erase(CurrList[i])) {
+ CurrList[i] = 0;
+ continue;
+ }
+ bool Again = false;
+ bool Success = joinCopy(CurrList[i], Again);
+ Progress |= Success;
+ if (Success || !Again)
+ CurrList[i] = 0;
+ }
+ return Progress;
+}
+
+void
+RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ // Collect all copy-like instructions in MBB. Don't start coalescing anything
+ // yet, it might invalidate the iterator.
+ const unsigned PrevSize = WorkList.size();
+ if (JoinGlobalCopies) {
+ // Coalesce copies bottom-up to coalesce local defs before local uses. They
+ // are not inherently easier to resolve, but slightly preferable until we
+ // have local live range splitting. In particular this is required by
+ // cmp+jmp macro fusion.
+ for (MachineBasicBlock::reverse_iterator
+ MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) {
+ if (!MII->isCopyLike())
+ continue;
+ if (isLocalCopy(&(*MII), LIS))
+ LocalWorkList.push_back(&(*MII));
+ else
+ WorkList.push_back(&(*MII));
+ }
+ }
+ else {
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ++MII)
+ if (MII->isCopyLike())
+ WorkList.push_back(MII);
+ }
+ // Try coalescing the collected copies immediately, and remove the nulls.
+ // This prevents the WorkList from getting too large since most copies are
+ // joinable on the first attempt.
+ MutableArrayRef<MachineInstr*>
+ CurrList(WorkList.begin() + PrevSize, WorkList.end());
+ if (copyCoalesceWorkList(CurrList))
+ WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
+ (MachineInstr*)0), WorkList.end());
+}
+
+void RegisterCoalescer::coalesceLocals() {
+ copyCoalesceWorkList(LocalWorkList);
+ for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
+ if (LocalWorkList[j])
+ WorkList.push_back(LocalWorkList[j]);
+ }
+ LocalWorkList.clear();
+}
+
+void RegisterCoalescer::joinAllIntervals() {
+ DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+ assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
+
+ std::vector<MBBPriorityInfo> MBBs;
+ MBBs.reserve(MF->size());
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
+ MachineBasicBlock *MBB = I;
+ MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
+ JoinSplitEdges && isSplitEdge(MBB)));
+ }
+ array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
+
+ // Coalesce intervals in MBB priority order.
+ unsigned CurrDepth = UINT_MAX;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ // Try coalescing the collected local copies for deeper loops.
+ if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
+ coalesceLocals();
+ CurrDepth = MBBs[i].Depth;
+ }
+ copyCoalesceInMBB(MBBs[i].MBB);
+ }
+ coalesceLocals();
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ while (copyCoalesceWorkList(WorkList))
+ /* empty */ ;
+}
+
+void RegisterCoalescer::releaseMemory() {
+ ErasedInstrs.clear();
+ WorkList.clear();
+ DeadDefs.clear();
+ InflateRegs.clear();
+}
+
+bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ MRI = &fn.getRegInfo();
+ TM = &fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ AA = &getAnalysis<AliasAnalysis>();
+ Loops = &getAnalysis<MachineLoopInfo>();
+
+ const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+ if (EnableGlobalCopies == cl::BOU_UNSET)
+ JoinGlobalCopies = ST.enableMachineScheduler();
+ else
+ JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
+
+ // The MachineScheduler does not currently require JoinSplitEdges. This will
+ // either be enabled unconditionally or replaced by a more general live range
+ // splitting optimization.
+ JoinSplitEdges = EnableJoinSplits;
+
+ DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: " << MF->getName() << '\n');
+
+ if (VerifyCoalescing)
+ MF->verify(this, "Before register coalescing");
+
+ RegClassInfo.runOnMachineFunction(fn);
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining)
+ joinAllIntervals();
+
+ // After deleting a lot of copies, register classes may be less constrained.
+ // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // DPR inflation.
+ array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+ InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+ InflateRegs.end());
+ DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+ for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+ unsigned Reg = InflateRegs[i];
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ if (MRI->recomputeRegClass(Reg, *TM)) {
+ DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+ << MRI->getRegClass(Reg)->getName() << '\n');
+ ++NumInflated;
+ }
+ }
+
+ DEBUG(dump());
+ if (VerifyCoalescing)
+ MF->verify(this, "After register coalescing");
+ return true;
+}
+
+/// print - Implement the dump method.
+void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
+ LIS->print(O, m);
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
new file mode 100644
index 0000000..47c3df1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -0,0 +1,120 @@
+//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the abstract interface for register coalescers,
+// allowing them to interact with and query register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
+#define LLVM_CODEGEN_REGISTER_COALESCER_H
+
+namespace llvm {
+
+ class MachineInstr;
+ class TargetRegisterInfo;
+ class TargetRegisterClass;
+ class TargetInstrInfo;
+
+ /// CoalescerPair - A helper class for register coalescers. When deciding if
+ /// two registers can be coalesced, CoalescerPair can determine if a copy
+ /// instruction would become an identity copy after coalescing.
+ class CoalescerPair {
+ const TargetRegisterInfo &TRI;
+
+ /// DstReg - The register that will be left after coalescing. It can be a
+ /// virtual or physical register.
+ unsigned DstReg;
+
+ /// SrcReg - the virtual register that will be coalesced into dstReg.
+ unsigned SrcReg;
+
+ /// DstIdx - The sub-register index of the old DstReg in the new coalesced
+ /// register.
+ unsigned DstIdx;
+
+ /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced
+ /// register.
+ unsigned SrcIdx;
+
+ /// Partial - True when the original copy was a partial subregister copy.
+ bool Partial;
+
+ /// CrossClass - True when both regs are virtual, and newRC is constrained.
+ bool CrossClass;
+
+ /// Flipped - True when DstReg and SrcReg are reversed from the original
+ /// copy instruction.
+ bool Flipped;
+
+ /// NewRC - The register class of the coalesced register, or NULL if DstReg
+ /// is a physreg. This register class may be a super-register of both
+ /// SrcReg and DstReg.
+ const TargetRegisterClass *NewRC;
+
+ public:
+ CoalescerPair(const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
+ Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+
+ /// Create a CoalescerPair representing a virtreg-to-physreg copy.
+ /// No need to call setRegisters().
+ CoalescerPair(unsigned VirtReg, unsigned PhysReg,
+ const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
+ Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+
+ /// setRegisters - set registers to match the copy instruction MI. Return
+ /// false if MI is not a coalescable copy instruction.
+ bool setRegisters(const MachineInstr*);
+
+ /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible
+ /// because DstReg is a physical register, or SubIdx is set.
+ bool flip();
+
+ /// isCoalescable - Return true if MI is a copy instruction that will become
+ /// an identity copy after coalescing.
+ bool isCoalescable(const MachineInstr*) const;
+
+ /// isPhys - Return true if DstReg is a physical register.
+ bool isPhys() const { return !NewRC; }
+
+ /// isPartial - Return true if the original copy instruction did not copy
+ /// the full register, but was a subreg operation.
+ bool isPartial() const { return Partial; }
+
+ /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller
+ /// register class than DstReg's.
+ bool isCrossClass() const { return CrossClass; }
+
+ /// isFlipped - Return true when getSrcReg is the register being defined by
+ /// the original copy instruction.
+ bool isFlipped() const { return Flipped; }
+
+ /// getDstReg - Return the register (virtual or physical) that will remain
+ /// after coalescing.
+ unsigned getDstReg() const { return DstReg; }
+
+ /// getSrcReg - Return the virtual register that will be coalesced away.
+ unsigned getSrcReg() const { return SrcReg; }
+
+ /// getDstIdx - Return the subregister index that DstReg will be coalesced
+ /// into, or 0.
+ unsigned getDstIdx() const { return DstIdx; }
+
+ /// getSrcIdx - Return the subregister index that SrcReg will be coalesced
+ /// into, or 0.
+ unsigned getSrcIdx() const { return SrcIdx; }
+
+ /// getNewRC - Return the register class of the coalesced register.
+ const TargetRegisterClass *getNewRC() const { return NewRC; }
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
new file mode 100644
index 0000000..97f22e1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -0,0 +1,793 @@
+//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterPressure class which can be used to track
+// MachineInstr level register pressure.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+/// Increase pressure for each pressure set provided by TargetRegisterInfo.
+static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ std::vector<unsigned> &MaxSetPressure,
+ const int *PSet, unsigned Weight) {
+ for (; *PSet != -1; ++PSet) {
+ CurrSetPressure[*PSet] += Weight;
+ if (&CurrSetPressure != &MaxSetPressure
+ && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) {
+ MaxSetPressure[*PSet] = CurrSetPressure[*PSet];
+ }
+ }
+}
+
+/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
+static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ const int *PSet, unsigned Weight) {
+ for (; *PSet != -1; ++PSet) {
+ assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow");
+ CurrSetPressure[*PSet] -= Weight;
+ }
+}
+
+/// Directly increase pressure only within this RegisterPressure result.
+void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ increaseSetPressure(MaxSetPressure, MaxSetPressure,
+ TRI->getRegClassPressureSets(RC),
+ TRI->getRegClassWeight(RC).RegWeight);
+ }
+ else {
+ increaseSetPressure(MaxSetPressure, MaxSetPressure,
+ TRI->getRegUnitPressureSets(Reg),
+ TRI->getRegUnitWeight(Reg));
+ }
+}
+
+/// Directly decrease pressure only within this RegisterPressure result.
+void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC),
+ TRI->getRegClassWeight(RC).RegWeight);
+ }
+ else {
+ decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg),
+ TRI->getRegUnitWeight(Reg));
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static void dumpSetPressure(const std::vector<unsigned> &SetPressure,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) {
+ if (SetPressure[i] != 0)
+ dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n';
+ }
+}
+
+void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
+ dbgs() << "Max Pressure: ";
+ dumpSetPressure(MaxSetPressure, TRI);
+ dbgs() << "Live In: ";
+ for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
+ dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
+ dbgs() << '\n';
+ dbgs() << "Live Out: ";
+ for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
+ dbgs() << PrintReg(LiveOutRegs[i], TRI) << " ";
+ dbgs() << '\n';
+}
+
+void RegPressureTracker::dump() const {
+ dbgs() << "Curr Pressure: ";
+ dumpSetPressure(CurrSetPressure, TRI);
+ P.dump(TRI);
+}
+#endif
+
+/// Increase the current pressure as impacted by these registers and bump
+/// the high water mark if needed.
+void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+ if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+ increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+ TRI->getRegClassPressureSets(RC),
+ TRI->getRegClassWeight(RC).RegWeight);
+ }
+ else {
+ increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+ TRI->getRegUnitPressureSets(Regs[I]),
+ TRI->getRegUnitWeight(Regs[I]));
+ }
+ }
+}
+
+/// Simply decrease the current pressure as impacted by these registers.
+void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+ if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+ decreaseSetPressure(CurrSetPressure,
+ TRI->getRegClassPressureSets(RC),
+ TRI->getRegClassWeight(RC).RegWeight);
+ }
+ else {
+ decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]),
+ TRI->getRegUnitWeight(Regs[I]));
+ }
+ }
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void IntervalPressure::reset() {
+ TopIdx = BottomIdx = SlotIndex();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void RegionPressure::reset() {
+ TopPos = BottomPos = MachineBasicBlock::const_iterator();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// If the current top is not less than or equal to the next index, open it.
+/// We happen to need the SlotIndex for the next top for pressure update.
+void IntervalPressure::openTop(SlotIndex NextTop) {
+ if (TopIdx <= NextTop)
+ return;
+ TopIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current top is the previous instruction (before receding), open it.
+void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) {
+ if (TopPos != PrevTop)
+ return;
+ TopPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is not greater than the previous index, open it.
+void IntervalPressure::openBottom(SlotIndex PrevBottom) {
+ if (BottomIdx > PrevBottom)
+ return;
+ BottomIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is the previous instr (before advancing), open it.
+void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
+ if (BottomPos != PrevBottom)
+ return;
+ BottomPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return &LIS->getInterval(Reg);
+ return LIS->getCachedRegUnit(Reg);
+}
+
+/// Setup the RegPressureTracker.
+///
+/// TODO: Add support for pressure without LiveIntervals.
+void RegPressureTracker::init(const MachineFunction *mf,
+ const RegisterClassInfo *rci,
+ const LiveIntervals *lis,
+ const MachineBasicBlock *mbb,
+ MachineBasicBlock::const_iterator pos)
+{
+ MF = mf;
+ TRI = MF->getTarget().getRegisterInfo();
+ RCI = rci;
+ MRI = &MF->getRegInfo();
+ MBB = mbb;
+
+ if (RequireIntervals) {
+ assert(lis && "IntervalPressure requires LiveIntervals");
+ LIS = lis;
+ }
+
+ CurrPos = pos;
+ CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
+
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).reset();
+ else
+ static_cast<RegionPressure&>(P).reset();
+ P.MaxSetPressure = CurrSetPressure;
+
+ LiveRegs.PhysRegs.clear();
+ LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
+ LiveRegs.VirtRegs.clear();
+ LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
+}
+
+/// Does this pressure result have a valid top position and live ins.
+bool RegPressureTracker::isTopClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).TopIdx.isValid();
+ return (static_cast<RegionPressure&>(P).TopPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+/// Does this pressure result have a valid bottom position and live outs.
+bool RegPressureTracker::isBottomClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).BottomIdx.isValid();
+ return (static_cast<RegionPressure&>(P).BottomPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+
+SlotIndex RegPressureTracker::getCurrSlot() const {
+ MachineBasicBlock::const_iterator IdxPos = CurrPos;
+ while (IdxPos != MBB->end() && IdxPos->isDebugValue())
+ ++IdxPos;
+ if (IdxPos == MBB->end())
+ return LIS->getMBBEndIdx(MBB);
+ return LIS->getInstructionIndex(IdxPos).getRegSlot();
+}
+
+/// Set the boundary for the top of the region and summarize live ins.
+void RegPressureTracker::closeTop() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).TopIdx = getCurrSlot();
+ else
+ static_cast<RegionPressure&>(P).TopPos = CurrPos;
+
+ assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
+ P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+ P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
+ for (SparseSet<unsigned>::const_iterator I =
+ LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
+ P.LiveInRegs.push_back(*I);
+ std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
+ P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
+ P.LiveInRegs.end());
+}
+
+/// Set the boundary for the bottom of the region and summarize live outs.
+void RegPressureTracker::closeBottom() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).BottomIdx = getCurrSlot();
+ else
+ static_cast<RegionPressure&>(P).BottomPos = CurrPos;
+
+ assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
+ P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+ P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
+ for (SparseSet<unsigned>::const_iterator I =
+ LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
+ P.LiveOutRegs.push_back(*I);
+ std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
+ P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
+ P.LiveOutRegs.end());
+}
+
+/// Finalize the region boundaries and record live ins and live outs.
+void RegPressureTracker::closeRegion() {
+ if (!isTopClosed() && !isBottomClosed()) {
+ assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() &&
+ "no region boundary");
+ return;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+ else if (!isTopClosed())
+ closeTop();
+ // If both top and bottom are closed, do nothing.
+}
+
+/// \brief Convenient wrapper for checking membership in RegisterOperands.
+static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) {
+ return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end();
+}
+
+/// Collect this instruction's unique uses and defs into SmallVectors for
+/// processing defs and uses in order.
+class RegisterOperands {
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+public:
+ SmallVector<unsigned, 8> Uses;
+ SmallVector<unsigned, 8> Defs;
+ SmallVector<unsigned, 8> DeadDefs;
+
+ RegisterOperands(const TargetRegisterInfo *tri,
+ const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {}
+
+ /// Push this operand's register onto the correct vector.
+ void collect(const MachineOperand &MO) {
+ if (!MO.isReg() || !MO.getReg())
+ return;
+ if (MO.readsReg())
+ pushRegUnits(MO.getReg(), Uses);
+ if (MO.isDef()) {
+ if (MO.isDead())
+ pushRegUnits(MO.getReg(), DeadDefs);
+ else
+ pushRegUnits(MO.getReg(), Defs);
+ }
+ }
+
+protected:
+ void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (containsReg(Regs, Reg))
+ return;
+ Regs.push_back(Reg);
+ }
+ else if (MRI->isAllocatable(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (containsReg(Regs, *Units))
+ continue;
+ Regs.push_back(*Units);
+ }
+ }
+ }
+};
+
+/// Collect physical and virtual register operands.
+static void collectOperands(const MachineInstr *MI,
+ RegisterOperands &RegOpers) {
+ for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+ RegOpers.collect(*OperI);
+
+ // Remove redundant physreg dead defs.
+ SmallVectorImpl<unsigned>::iterator I =
+ std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
+ std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
+ RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
+}
+
+/// Force liveness of registers.
+void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ if (LiveRegs.insert(Regs[i]))
+ increaseRegPressure(Regs[i]);
+ }
+}
+
+/// Add Reg to the live in set and increase max pressure.
+void RegPressureTracker::discoverLiveIn(unsigned Reg) {
+ assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+ if (containsReg(P.LiveInRegs, Reg))
+ return;
+
+ // At live in discovery, unconditionally increase the high water mark.
+ P.LiveInRegs.push_back(Reg);
+ P.increase(Reg, TRI, MRI);
+}
+
+/// Add Reg to the live out set and increase max pressure.
+void RegPressureTracker::discoverLiveOut(unsigned Reg) {
+ assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+ if (containsReg(P.LiveOutRegs, Reg))
+ return;
+
+ // At live out discovery, unconditionally increase the high water mark.
+ P.LiveOutRegs.push_back(Reg);
+ P.increase(Reg, TRI, MRI);
+}
+
+/// Recede across the previous instruction.
+bool RegPressureTracker::recede() {
+ // Check for the top of the analyzable region.
+ if (CurrPos == MBB->begin()) {
+ closeRegion();
+ return false;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+
+ // Open the top of the region using block iterators.
+ if (!RequireIntervals && isTopClosed())
+ static_cast<RegionPressure&>(P).openTop(CurrPos);
+
+ // Find the previous instruction.
+ do
+ --CurrPos;
+ while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+
+ if (CurrPos->isDebugValue()) {
+ closeRegion();
+ return false;
+ }
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
+ // Open the top of the region using slot indexes.
+ if (RequireIntervals && isTopClosed())
+ static_cast<IntervalPressure&>(P).openTop(SlotIdx);
+
+ RegisterOperands RegOpers(TRI, MRI);
+ collectOperands(CurrPos, RegOpers);
+
+ // Boost pressure for all dead defs together.
+ increaseRegPressure(RegOpers.DeadDefs);
+ decreaseRegPressure(RegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ // TODO: consider earlyclobbers?
+ for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Defs[i];
+ if (LiveRegs.erase(Reg))
+ decreaseRegPressure(Reg);
+ else
+ discoverLiveOut(Reg);
+ }
+
+ // Generate liveness for uses.
+ for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Uses[i];
+ if (!LiveRegs.contains(Reg)) {
+ // Adjust liveouts if LiveIntervals are available.
+ if (RequireIntervals) {
+ const LiveInterval *LI = getInterval(Reg);
+ if (LI && !LI->killedAt(SlotIdx))
+ discoverLiveOut(Reg);
+ }
+ increaseRegPressure(Reg);
+ LiveRegs.insert(Reg);
+ }
+ }
+ return true;
+}
+
+/// Advance across the current instruction.
+bool RegPressureTracker::advance() {
+ // Check for the bottom of the analyzable region.
+ if (CurrPos == MBB->end()) {
+ closeRegion();
+ return false;
+ }
+ if (!isTopClosed())
+ closeTop();
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = getCurrSlot();
+
+ // Open the bottom of the region using slot indexes.
+ if (isBottomClosed()) {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).openBottom(SlotIdx);
+ else
+ static_cast<RegionPressure&>(P).openBottom(CurrPos);
+ }
+
+ RegisterOperands RegOpers(TRI, MRI);
+ collectOperands(CurrPos, RegOpers);
+
+ for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Uses[i];
+ // Discover live-ins.
+ bool isLive = LiveRegs.contains(Reg);
+ if (!isLive)
+ discoverLiveIn(Reg);
+ // Kill liveness at last uses.
+ bool lastUse = false;
+ if (RequireIntervals) {
+ const LiveInterval *LI = getInterval(Reg);
+ lastUse = LI && LI->killedAt(SlotIdx);
+ }
+ else {
+ // Allocatable physregs are always single-use before register rewriting.
+ lastUse = !TargetRegisterInfo::isVirtualRegister(Reg);
+ }
+ if (lastUse && isLive) {
+ LiveRegs.erase(Reg);
+ decreaseRegPressure(Reg);
+ }
+ else if (!lastUse && !isLive)
+ increaseRegPressure(Reg);
+ }
+
+ // Generate liveness for defs.
+ for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Defs[i];
+ if (LiveRegs.insert(Reg))
+ increaseRegPressure(Reg);
+ }
+
+ // Boost pressure for all dead defs together.
+ increaseRegPressure(RegOpers.DeadDefs);
+ decreaseRegPressure(RegOpers.DeadDefs);
+
+ // Find the next instruction.
+ do
+ ++CurrPos;
+ while (CurrPos != MBB->end() && CurrPos->isDebugValue());
+ return true;
+}
+
+/// Find the max change in excess pressure across all sets.
+static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
+ ArrayRef<unsigned> NewPressureVec,
+ RegPressureDelta &Delta,
+ const TargetRegisterInfo *TRI) {
+ int ExcessUnits = 0;
+ unsigned PSetID = ~0U;
+ for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldPressureVec[i];
+ unsigned PNew = NewPressureVec[i];
+ int PDiff = (int)PNew - (int)POld;
+ if (!PDiff) // No change in this set in the common case.
+ continue;
+ // Only consider change beyond the limit.
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ if (Limit > POld) {
+ if (Limit > PNew)
+ PDiff = 0; // Under the limit
+ else
+ PDiff = PNew - Limit; // Just exceeded limit.
+ }
+ else if (Limit > PNew)
+ PDiff = Limit - POld; // Just obeyed limit.
+
+ if (std::abs(PDiff) > std::abs(ExcessUnits)) {
+ ExcessUnits = PDiff;
+ PSetID = i;
+ }
+ }
+ Delta.Excess.PSetID = PSetID;
+ Delta.Excess.UnitIncrease = ExcessUnits;
+}
+
+/// Find the max change in max pressure that either surpasses a critical PSet
+/// limit or exceeds the current MaxPressureLimit.
+///
+/// FIXME: comparing each element of the old and new MaxPressure vectors here is
+/// silly. It's done now to demonstrate the concept but will go away with a
+/// RegPressureTracker API change to work with pressure differences.
+static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
+ ArrayRef<unsigned> NewMaxPressureVec,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit,
+ RegPressureDelta &Delta) {
+ Delta.CriticalMax = PressureElement();
+ Delta.CurrentMax = PressureElement();
+
+ unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
+ for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldMaxPressureVec[i];
+ unsigned PNew = NewMaxPressureVec[i];
+ if (PNew == POld) // No change in this set in the common case.
+ continue;
+
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i)
+ ++CritIdx;
+
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) {
+ int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease;
+ if (PDiff > Delta.CriticalMax.UnitIncrease) {
+ Delta.CriticalMax.PSetID = i;
+ Delta.CriticalMax.UnitIncrease = PDiff;
+ }
+ }
+
+ // Find the greatest increase above MaxPressureLimit.
+ // (Ignores negative MDiff).
+ int MDiff = (int)PNew - (int)MaxPressureLimit[i];
+ if (MDiff > Delta.CurrentMax.UnitIncrease) {
+ Delta.CurrentMax.PSetID = i;
+ Delta.CurrentMax.UnitIncrease = PNew;
+ }
+ }
+}
+
+/// Record the upward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers(TRI, MRI);
+ collectOperands(MI, RegOpers);
+
+ // Boost max pressure for all dead defs together.
+ // Since CurrSetPressure and MaxSetPressure
+ increaseRegPressure(RegOpers.DeadDefs);
+ decreaseRegPressure(RegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Defs[i];
+ if (!containsReg(RegOpers.Uses, Reg))
+ decreaseRegPressure(Reg);
+ }
+ // Generate liveness for uses.
+ for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Uses[i];
+ if (!LiveRegs.contains(Reg))
+ increaseRegPressure(Reg);
+ }
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// bottom-up. Find the pressure set with the most change beyond its pressure
+/// limit based on the tracker's current pressure, and return the change in
+/// number of register units of that pressure set introduced by this
+/// instruction.
+///
+/// This assumes that the current LiveOut set is sufficient.
+///
+/// FIXME: This is expensive for an on-the-fly query. We need to cache the
+/// result per-SUnit with enough information to adjust for the current
+/// scheduling position. But this works as a proof of concept.
+void RegPressureTracker::
+getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ // FIXME: The snapshot heap space should persist. But I'm planning to
+ // summarize the pressure effect so we don't need to snapshot at all.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.UnitIncrease >= 0 &&
+ Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
+static bool findUseBetween(unsigned Reg,
+ SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
+ const MachineRegisterInfo *MRI,
+ const LiveIntervals *LIS) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end();
+ UI != UE; UI.skipInstruction()) {
+ const MachineInstr* MI = &*UI;
+ if (MI->isDebugValue())
+ continue;
+ SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
+ if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
+ return true;
+ }
+ return false;
+}
+
+/// Record the downward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers(TRI, MRI);
+ collectOperands(MI, RegOpers);
+
+ // Kill liveness at last uses. Assume allocatable physregs are single-use
+ // rather than checking LiveIntervals.
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
+
+ for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = RegOpers.Uses[i];
+ if (RequireIntervals) {
+ // FIXME: allow the caller to pass in the list of vreg uses that remain
+ // to be bottom-scheduled to avoid searching uses at each query.
+ SlotIndex CurrIdx = getCurrSlot();
+ const LiveInterval *LI = getInterval(Reg);
+ if (LI && LI->killedAt(SlotIdx)
+ && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
+ decreaseRegPressure(Reg);
+ }
+ }
+ else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Allocatable physregs are always single-use before register rewriting.
+ decreaseRegPressure(Reg);
+ }
+ }
+
+ // Generate liveness for defs.
+ increaseRegPressure(RegOpers.Defs);
+
+ // Boost pressure for all dead defs together.
+ increaseRegPressure(RegOpers.DeadDefs);
+ decreaseRegPressure(RegOpers.DeadDefs);
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// top-down. Find the register class with the most change in its pressure limit
+/// based on the tracker's current pressure, and return the number of excess
+/// register units of that pressure set introduced by this instruction.
+///
+/// This assumes that the current LiveIn set is sufficient.
+void RegPressureTracker::
+getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.UnitIncrease >= 0 &&
+ Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Get the pressure of each PSet after traversing this instruction bottom-up.
+void RegPressureTracker::
+getUpwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
+
+/// Get the pressure of each PSet after traversing this instruction top-down.
+void RegPressureTracker::
+getDownwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 0000000..07ace7a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,443 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information, such as unused registers, at any point in a machine basic block.
+// It also provides a mechanism to make registers available by evicting them to
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+/// setUsed - Set the register and its sub-registers as being used.
+void RegScavenger::setUsed(unsigned Reg) {
+ RegsAvailable.reset(Reg);
+
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RegsAvailable.reset(*SubRegs);
+}
+
+bool RegScavenger::isAliasUsed(unsigned Reg) const {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ if (isUsed(*AI, *AI == Reg))
+ return true;
+ return false;
+}
+
+void RegScavenger::initRegState() {
+ for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ I->Reg = 0;
+ I->Restore = NULL;
+ }
+
+ // All registers started out unused.
+ RegsAvailable.set();
+
+ if (!MBB)
+ return;
+
+ // Live-in registers are in use.
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ setUsed(*I);
+
+ // Pristine CSRs are also unavailable.
+ BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+ setUsed(I);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+ MachineFunction &MF = *mbb->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+ "Target changed?");
+
+ // It is not possible to use the register scavenger after late optimization
+ // passes that don't preserve accurate liveness information.
+ assert(MRI->tracksLiveness() &&
+ "Cannot use register scavenger with inaccurate liveness");
+
+ // Self-initialize.
+ if (!MBB) {
+ NumPhysRegs = TRI->getNumRegs();
+ RegsAvailable.resize(NumPhysRegs);
+ KillRegs.resize(NumPhysRegs);
+ DefRegs.resize(NumPhysRegs);
+
+ // Create callee-saved registers bitvector.
+ CalleeSavedRegs.resize(NumPhysRegs);
+ const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ if (CSRegs != NULL)
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CalleeSavedRegs.set(CSRegs[i]);
+ }
+
+ MBB = mbb;
+ initRegState();
+
+ Tracking = false;
+}
+
+void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
+ BV.set(Reg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ BV.set(*SubRegs);
+}
+
+void RegScavenger::determineKillsAndDefs() {
+ assert(Tracking && "Must be tracking to determine kills and defs");
+
+ MachineInstr *MI = MBBI;
+ assert(!MI->isDebugValue() && "Debug values have no kills or defs");
+
+ // Find out which registers are early clobbered, killed, defined, and marked
+ // def-dead in this instruction.
+ // FIXME: The scavenger is not predication aware. If the instruction is
+ // predicated, conservatively assume "kill" markers do not actually kill the
+ // register. Similarly ignores "dead" markers.
+ bool isPred = TII->isPredicated(MI);
+ KillRegs.reset();
+ DefRegs.reset();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
+ continue;
+
+ if (MO.isUse()) {
+ // Ignore undef uses.
+ if (MO.isUndef())
+ continue;
+ if (!isPred && MO.isKill())
+ addRegWithSubRegs(KillRegs, Reg);
+ } else {
+ assert(MO.isDef());
+ if (!isPred && MO.isDead())
+ addRegWithSubRegs(KillRegs, Reg);
+ else
+ addRegWithSubRegs(DefRegs, Reg);
+ }
+ }
+}
+
+void RegScavenger::unprocess() {
+ assert(Tracking && "Cannot unprocess because we're not tracking");
+
+ MachineInstr *MI = MBBI;
+ if (MI->isDebugValue())
+ return;
+
+ determineKillsAndDefs();
+
+ // Commit the changes.
+ setUsed(KillRegs);
+ setUnused(DefRegs);
+
+ if (MBBI == MBB->begin()) {
+ MBBI = MachineBasicBlock::iterator(NULL);
+ Tracking = false;
+ } else
+ --MBBI;
+}
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already past the end of the basic block!");
+ MBBI = llvm::next(MBBI);
+ }
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+
+ MachineInstr *MI = MBBI;
+
+ for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ if (I->Restore != MI)
+ continue;
+
+ I->Reg = 0;
+ I->Restore = NULL;
+ }
+
+ if (MI->isDebugValue())
+ return;
+
+ determineKillsAndDefs();
+
+ // Verify uses and defs.
+#ifndef NDEBUG
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
+ continue;
+ if (MO.isUse()) {
+ if (MO.isUndef())
+ continue;
+ if (!isUsed(Reg)) {
+ // Check if it's partial live: e.g.
+ // D0 = insert_subreg D0<undef>, S0
+ // ... D0
+ // The problem is the insert_subreg could be eliminated. The use of
+ // D0 is using a partially undef value. This is not *incorrect* since
+ // S1 is can be freely clobbered.
+ // Ideally we would like a way to model this, but leaving the
+ // insert_subreg around causes both correctness and performance issues.
+ bool SubUsed = false;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ if (isUsed(*SubRegs)) {
+ SubUsed = true;
+ break;
+ }
+ if (!SubUsed) {
+ MBB->getParent()->verify(NULL, "In Register Scavenger");
+ llvm_unreachable("Using an undefined register!");
+ }
+ (void)SubUsed;
+ }
+ } else {
+ assert(MO.isDef());
+#if 0
+ // FIXME: Enable this once we've figured out how to correctly transfer
+ // implicit kills during codegen passes like the coalescer.
+ assert((KillRegs.test(Reg) || isUnused(Reg) ||
+ isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+ "Re-defining a live register!");
+#endif
+ }
+ }
+#endif // NDEBUG
+
+ // Commit the changes.
+ setUnused(KillRegs);
+ setUsed(DefRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+ used = RegsAvailable;
+ used.flip();
+ if (includeReserved)
+ used |= MRI->getReservedRegs();
+ else
+ used.reset(MRI->getReservedRegs());
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (!isAliasUsed(*I)) {
+ DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
+ "\n");
+ return *I;
+ }
+ return 0;
+}
+
+/// getRegsAvailable - Return all available registers in the register class
+/// in Mask.
+BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
+ BitVector Mask(TRI->getNumRegs());
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (!isAliasUsed(*I))
+ Mask.set(*I);
+ return Mask;
+}
+
+/// findSurvivorReg - Return the candidate register that is unused for the
+/// longest after StargMII. UseMI is set to the instruction where the search
+/// stopped.
+///
+/// No more than InstrLimit instructions are inspected.
+///
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
+ BitVector &Candidates,
+ unsigned InstrLimit,
+ MachineBasicBlock::iterator &UseMI) {
+ int Survivor = Candidates.find_first();
+ assert(Survivor > 0 && "No candidates for scavenging");
+
+ MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
+ assert(StartMI != ME && "MI already at terminator");
+ MachineBasicBlock::iterator RestorePointMI = StartMI;
+ MachineBasicBlock::iterator MI = StartMI;
+
+ bool inVirtLiveRange = false;
+ for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+ if (MI->isDebugValue()) {
+ ++InstrLimit; // Don't count debug instructions
+ continue;
+ }
+ bool isVirtKillInsn = false;
+ bool isVirtDefInsn = false;
+ // Remove any candidates touched by instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ Candidates.clearBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isDef())
+ isVirtDefInsn = true;
+ else if (MO.isKill())
+ isVirtKillInsn = true;
+ continue;
+ }
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
+ }
+ // If we're not in a virtual reg's live range, this is a valid
+ // restore point.
+ if (!inVirtLiveRange) RestorePointMI = MI;
+
+ // Update whether we're in the live range of a virtual register
+ if (isVirtKillInsn) inVirtLiveRange = false;
+ if (isVirtDefInsn) inVirtLiveRange = true;
+
+ // Was our survivor untouched by this instruction?
+ if (Candidates.test(Survivor))
+ continue;
+
+ // All candidates gone?
+ if (Candidates.none())
+ break;
+
+ Survivor = Candidates.find_first();
+ }
+ // If we ran off the end, that's where we want to restore.
+ if (MI == ME) RestorePointMI = ME;
+ assert (RestorePointMI != StartMI &&
+ "No available scavenger restore location!");
+
+ // We ran out of candidates, so stop the search.
+ UseMI = RestorePointMI;
+ return Survivor;
+}
+
+static unsigned getFrameIndexOperandNum(MachineInstr *MI) {
+ unsigned i = 0;
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+ return i;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates =
+ TRI->getAllocatableSet(*I->getParent()->getParent(), RC);
+
+ // Exclude all the registers being used by the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (MO.isReg() && MO.getReg() != 0 &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ Candidates.reset(MO.getReg());
+ }
+
+ // Try to find a register that's unused if there is one, as then we won't
+ // have to spill. Search explicitly rather than masking out based on
+ // RegsAvailable, as RegsAvailable does not take aliases into account.
+ // That's what getRegsAvailable() is for.
+ BitVector Available = getRegsAvailable(RC);
+ Available &= Candidates;
+ if (Available.any())
+ Candidates = Available;
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+ // If we found an unused register there is no reason to spill it.
+ if (!isAliasUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+ return SReg;
+ }
+
+ // Find an available scavenging slot.
+ unsigned SI;
+ for (SI = 0; SI < Scavenged.size(); ++SI)
+ if (Scavenged[SI].Reg == 0)
+ break;
+
+ if (SI == Scavenged.size()) {
+ // We need to scavenge a register but have no spill slot, the target
+ // must know how to do it (if not, we'll assert below).
+ Scavenged.push_back(ScavengedInfo());
+ }
+
+ // Avoid infinite regress
+ Scavenged[SI].Reg = SReg;
+
+ // If the target knows how to save/restore the register, let it do so;
+ // otherwise, use the emergency stack spill slot.
+ if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
+ // Spill the scavenged register before I.
+ assert(Scavenged[SI].FrameIndex >= 0 &&
+ "Cannot scavenge register without an emergency spill slot!");
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
+ RC, TRI);
+ MachineBasicBlock::iterator II = prior(I);
+
+ unsigned FIOperandNum = getFrameIndexOperandNum(II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
+
+ // Restore the scavenged register before its use (or first terminator).
+ TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
+ RC, TRI);
+ II = prior(UseMI);
+
+ FIOperandNum = getFrameIndexOperandNum(II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
+ }
+
+ Scavenged[SI].Restore = prior(UseMI);
+
+ // Doing this here leads to infinite regress.
+ // Scavenged[SI].Reg = SReg;
+
+ DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
+ "\n");
+
+ return SReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 0000000..07e5b47
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,642 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <climits>
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool> StressSchedOpt(
+ "stress-sched", cl::Hidden, cl::init(false),
+ cl::desc("Stress test instruction scheduling"));
+#endif
+
+void SchedulingPriorityQueue::anchor() { }
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+ : TM(mf.getTarget()),
+ TII(TM.getInstrInfo()),
+ TRI(TM.getRegisterInfo()),
+ MF(mf), MRI(mf.getRegInfo()),
+ EntrySU(), ExitSU() {
+#ifndef NDEBUG
+ StressSched = StressSchedOpt;
+#endif
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// Clear the DAG state (e.g. between scheduling regions).
+void ScheduleDAG::clearDAG() {
+ SUnits.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+}
+
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return NULL;
+ return &TII->get(Node->getMachineOpcode());
+}
+
+/// addPred - This adds the specified edge as a pred of the current node if
+/// not already. It also adds the current node as a successor of the
+/// specified node.
+bool SUnit::addPred(const SDep &D, bool Required) {
+ // If this node already has this depenence, don't add a redundant one.
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ // Zero-latency weak edges may be added purely for heuristic ordering. Don't
+ // add them if another kind of edge already exists.
+ if (!Required && I->getSUnit() == D.getSUnit())
+ return false;
+ if (I->overlaps(D)) {
+ // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
+ if (I->getLatency() < D.getLatency()) {
+ SUnit *PredSU = I->getSUnit();
+ // Find the corresponding successor in N.
+ SDep ForwardD = *I;
+ ForwardD.setSUnit(this);
+ for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ if (*II == ForwardD) {
+ II->setLatency(D.getLatency());
+ break;
+ }
+ }
+ I->setLatency(D.getLatency());
+ }
+ return false;
+ }
+ }
+ // Now add a corresponding succ to N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ // Update the bookkeeping.
+ if (D.getKind() == SDep::Data) {
+ assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
+ assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+ ++NumPreds;
+ ++N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak()) {
+ ++WeakPredsLeft;
+ }
+ else {
+ assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+ ++NumPredsLeft;
+ }
+ }
+ if (!isScheduled) {
+ if (D.isWeak()) {
+ ++N->WeakSuccsLeft;
+ }
+ else {
+ assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++N->NumSuccsLeft;
+ }
+ }
+ Preds.push_back(D);
+ N->Succs.push_back(P);
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return true;
+}
+
+/// removePred - This removes the specified edge as a pred of the current
+/// node if it exists. It also removes the current node as a successor of
+/// the specified node.
+void SUnit::removePred(const SDep &D) {
+ // Find the matching predecessor.
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D) {
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(),
+ N->Succs.end(), P);
+ assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
+ N->Succs.erase(Succ);
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ assert(NumPreds > 0 && "NumPreds will underflow!");
+ assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak())
+ --WeakPredsLeft;
+ else {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
+ }
+ }
+ if (!isScheduled) {
+ if (D.isWeak())
+ --N->WeakSuccsLeft;
+ else {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
+ }
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return;
+ }
+}
+
+void SUnit::setDepthDirty() {
+ if (!isDepthCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isDepthCurrent = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(),
+ E = SU->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isDepthCurrent)
+ WorkList.push_back(SuccSU);
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+ if (!isHeightCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isHeightCurrent = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),
+ E = SU->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isHeightCurrent)
+ WorkList.push_back(PredSU);
+ }
+ } while (!WorkList.empty());
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+ if (NewDepth <= getDepth())
+ return;
+ setDepthDirty();
+ Depth = NewDepth;
+ isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+ if (NewHeight <= getHeight())
+ return;
+ setHeightDirty();
+ Height = NewHeight;
+ isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxPredDepth = 0;
+ for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+ E = Cur->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isDepthCurrent)
+ MaxPredDepth = std::max(MaxPredDepth,
+ PredSU->Depth + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(PredSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxPredDepth != Cur->Depth) {
+ Cur->setDepthDirty();
+ Cur->Depth = MaxPredDepth;
+ }
+ Cur->isDepthCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxSuccHeight = 0;
+ for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+ E = Cur->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isHeightCurrent)
+ MaxSuccHeight = std::max(MaxSuccHeight,
+ SuccSU->Height + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(SuccSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxSuccHeight != Cur->Height) {
+ Cur->setHeightDirty();
+ Cur->Height = MaxSuccHeight;
+ }
+ Cur->isHeightCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::biasCriticalPath() {
+ if (NumPreds < 2)
+ return;
+
+ SUnit::pred_iterator BestI = Preds.begin();
+ unsigned MaxDepth = BestI->getSUnit()->getDepth();
+ for (SUnit::pred_iterator
+ I = llvm::next(BestI), E = Preds.end(); I != E; ++I) {
+ if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
+ BestI = I;
+ }
+ if (BestI != Preds.begin())
+ std::swap(*Preds.begin(), *BestI);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+ dbgs() << "SU(" << NodeNum << "): ";
+ G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+ dump(G);
+
+ dbgs() << " # preds left : " << NumPredsLeft << "\n";
+ dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ if (WeakPredsLeft)
+ dbgs() << " # weak preds left : " << WeakPredsLeft << "\n";
+ if (WeakSuccsLeft)
+ dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n";
+ dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
+ dbgs() << " Latency : " << Latency << "\n";
+ dbgs() << " Depth : " << getDepth() << "\n";
+ dbgs() << " Height : " << getHeight() << "\n";
+
+ if (Preds.size() != 0) {
+ dbgs() << " Predecessors:\n";
+ for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << "\n";
+ }
+ }
+ if (Succs.size() != 0) {
+ dbgs() << " Successors:\n";
+ for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << "\n";
+ }
+ }
+ dbgs() << "\n";
+}
+#endif
+
+#ifndef NDEBUG
+/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+/// their state is consistent. Return the number of scheduled nodes.
+///
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnits[i].isScheduled &&
+ (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
+ unsigned(INT_MAX)) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has an unexpected "
+ << (isBottomUp ? "Height" : "Depth") << " value!\n";
+ AnyNotSched = true;
+ }
+ if (isBottomUp) {
+ if (SUnits[i].NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ } else {
+ if (SUnits[i].NumPredsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has predecessors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ }
+ assert(!AnyNotSched);
+ return SUnits.size() - DeadNodes;
+}
+#endif
+
+/// InitDAGTopologicalSorting - create the initial topological
+/// ordering from the DAG to be scheduled.
+///
+/// The idea of the algorithm is taken from
+/// "Online algorithms for managing the topological order of
+/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// "Maintaining a topological order under edge insertions".
+///
+/// Short description of the algorithm:
+///
+/// Topological ordering, ord, of a DAG maps each node to a topological
+/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+///
+/// This means that if there is a path from the node X to the node Z,
+/// then ord(X) < ord(Z).
+///
+/// This property can be used to check for reachability of nodes:
+/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// create a cycle.
+///
+/// The algorithm first computes a topological ordering for the DAG by
+/// initializing the Index2Node and Node2Index arrays and then tries to keep
+/// the ordering up-to-date after edge insertions by reordering the DAG.
+///
+/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
+/// the nodes reachable from Y, and then shifts them using Shift to lie
+/// immediately after X in Index2Node.
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+ WorkList.reserve(DAGSize);
+
+ Index2Node.resize(DAGSize);
+ Node2Index.resize(DAGSize);
+
+ // Initialize the data structures.
+ if (ExitSU)
+ WorkList.push_back(ExitSU);
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ int NodeNum = SU->NodeNum;
+ unsigned Degree = SU->Succs.size();
+ // Temporarily use the Node2Index array as scratch space for degree counts.
+ Node2Index[NodeNum] = Degree;
+
+ // Is it a node without dependencies?
+ if (Degree == 0) {
+ assert(SU->Succs.empty() && "SUnit should have no successors");
+ // Collect leaf nodes.
+ WorkList.push_back(SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ if (SU->NodeNum < DAGSize)
+ Allocate(SU->NodeNum, --Id);
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit *SU = I->getSUnit();
+ if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
+ // If all dependencies of the node are processed already,
+ // then the node can be computed now.
+ WorkList.push_back(SU);
+ }
+ }
+
+ Visited.resize(DAGSize);
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ "Wrong topological sorting");
+ }
+ }
+#endif
+}
+
+/// AddPred - Updates the topological ordering to accommodate an edge
+/// to be added from SUnit X to SUnit Y.
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[Y->NodeNum];
+ UpperBound = Node2Index[X->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(X) < Ord(Y) ?
+ if (LowerBound < UpperBound) {
+ // Update the topological order.
+ Visited.reset();
+ DFS(Y, UpperBound, HasLoop);
+ assert(!HasLoop && "Inserted edge creates a loop!");
+ // Recompute topological indexes.
+ Shift(Visited, LowerBound, UpperBound);
+ }
+}
+
+/// RemovePred - Updates the topological ordering to accommodate an
+/// an edge to be removed from the specified node N from the predecessors
+/// of the current node M.
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+ // InitDAGTopologicalSorting();
+}
+
+/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
+/// all nodes affected by the edge insertion. These nodes will later get new
+/// topological indexes by means of the Shift method.
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+ bool &HasLoop) {
+ std::vector<const SUnit*> WorkList;
+ WorkList.reserve(SUnits.size());
+
+ WorkList.push_back(SU);
+ do {
+ SU = WorkList.back();
+ WorkList.pop_back();
+ Visited.set(SU->NodeNum);
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ unsigned s = SU->Succs[I].getSUnit()->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+ if (s >= Node2Index.size())
+ continue;
+ if (Node2Index[s] == UpperBound) {
+ HasLoop = true;
+ return;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ WorkList.push_back(SU->Succs[I].getSUnit());
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+/// Shift - Renumber the nodes so that the topological ordering is
+/// preserved.
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+ int UpperBound) {
+ std::vector<int> L;
+ int shift = 0;
+ int i;
+
+ for (i = LowerBound; i <= UpperBound; ++i) {
+ // w is node at topological index i.
+ int w = Index2Node[i];
+ if (Visited.test(w)) {
+ // Unmark.
+ Visited.reset(w);
+ L.push_back(w);
+ shift = shift + 1;
+ } else {
+ Allocate(w, i - shift);
+ }
+ }
+
+ for (unsigned j = 0; j < L.size(); ++j) {
+ Allocate(L[j], i - shift);
+ i = i + 1;
+ }
+}
+
+
+/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
+/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+ // Is SU reachable from TargetSU via successor edges?
+ if (IsReachable(SU, TargetSU))
+ return true;
+ for (SUnit::pred_iterator
+ I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
+ if (I->isAssignedRegDep() &&
+ IsReachable(SU, I->getSUnit()))
+ return true;
+ return false;
+}
+
+/// IsReachable - Checks if SU is reachable from TargetSU.
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+ const SUnit *TargetSU) {
+ // If insertion of the edge SU->TargetSU would create a cycle
+ // then there is a path from TargetSU to SU.
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[TargetSU->NodeNum];
+ UpperBound = Node2Index[SU->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(TargetSU) < Ord(SU) ?
+ if (LowerBound < UpperBound) {
+ Visited.reset();
+ // There may be a path from TargetSU to SU. Check for it.
+ DFS(TargetSU, UpperBound, HasLoop);
+ }
+ return HasLoop;
+}
+
+/// Allocate - assign the topological index to the node n.
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+ Node2Index[n] = index;
+ Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
+ : SUnits(sunits), ExitSU(exitsu) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 0000000..71e7a21
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,1322 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAGInstrs class, which implements re-scheduling
+// of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable use of AA during MI GAD construction"));
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt,
+ bool IsPostRAFlag,
+ LiveIntervals *lis)
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
+ IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
+ assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
+ DbgValues.clear();
+ assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
+ "Virtual registers must be removed prior to PostRA scheduling");
+
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ SchedModel.init(*ST.getSchedModel(), &ST, TII);
+}
+
+/// getUnderlyingObjectFromInt - This is the function that does the work of
+/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+ do {
+ if (const Operator *U = dyn_cast<Operator>(V)) {
+ // If we find a ptrtoint, we can transfer control back to the
+ // regular getUnderlyingObjectFromInt.
+ if (U->getOpcode() == Instruction::PtrToInt)
+ return U->getOperand(0);
+ // If we find an add of a constant, a multiplied value, or a phi, it's
+ // likely that the other operand will lead us to the base
+ // object. We don't have to worry about the case where the
+ // object address is somehow being computed by the multiply,
+ // because our callers only care when the result is an
+ // identifiable object.
+ if (U->getOpcode() != Instruction::Add ||
+ (!isa<ConstantInt>(U->getOperand(1)) &&
+ Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
+ !isa<PHINode>(U->getOperand(1))))
+ return V;
+ V = U->getOperand(0);
+ } else {
+ return V;
+ }
+ assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
+ } while (1);
+}
+
+/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
+/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+static void getUnderlyingObjects(const Value *V,
+ SmallVectorImpl<Value *> &Objects) {
+ SmallPtrSet<const Value*, 16> Visited;
+ SmallVector<const Value *, 4> Working(1, V);
+ do {
+ V = Working.pop_back_val();
+
+ SmallVector<Value *, 4> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+
+ for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+ I != IE; ++I) {
+ V = *I;
+ if (!Visited.insert(V))
+ continue;
+ if (Operator::getOpcode(V) == Instruction::IntToPtr) {
+ const Value *O =
+ getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+ if (O->getType()->isPointerTy()) {
+ Working.push_back(O);
+ continue;
+ }
+ }
+ Objects.push_back(const_cast<Value *>(V));
+ }
+ } while (!Working.empty());
+}
+
+/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
+/// information and it can be tracked to a normal reference to a known
+/// object, return the Value for that object.
+static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
+ const MachineFrameInfo *MFI,
+ SmallVectorImpl<std::pair<const Value *, bool> > &Objects) {
+ if (!MI->hasOneMemOperand() ||
+ !(*MI->memoperands_begin())->getValue() ||
+ (*MI->memoperands_begin())->isVolatile())
+ return;
+
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
+ return;
+
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(V, Objs);
+
+ for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+ I != IE; ++I) {
+ bool MayAlias = true;
+ V = *I;
+
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+
+ if (PSV->isAliased(MFI)) {
+ Objects.clear();
+ return;
+ }
+
+ MayAlias = PSV->mayAlias(MFI);
+ } else if (!isIdentifiedObject(V)) {
+ Objects.clear();
+ return;
+ }
+
+ Objects.push_back(std::make_pair(V, MayAlias));
+ }
+}
+
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
+ BB = bb;
+}
+
+void ScheduleDAGInstrs::finishBlock() {
+ // Subclasses should no longer refer to the old block.
+ BB = 0;
+}
+
+/// Initialize the DAG and common scheduler state for the current scheduling
+/// region. This does not actually create the DAG, only clears it. The
+/// scheduling driver may call BuildSchedGraph multiple times per scheduling
+/// region.
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ assert(bb == BB && "startBlock should set BB");
+ RegionBegin = begin;
+ RegionEnd = end;
+ EndIndex = endcount;
+ MISUnitMap.clear();
+
+ ScheduleDAG::clearDAG();
+}
+
+/// Close the current scheduling region. Don't clear any state in case the
+/// driver wants to refer to the previous scheduling region.
+void ScheduleDAGInstrs::exitRegion() {
+ // Nothing to do.
+}
+
+/// addSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+ MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
+ ExitSU.setInstr(ExitMI);
+ bool AllDepKnown = ExitMI &&
+ (ExitMI->isCall() || ExitMI->isBarrier());
+ if (ExitMI && AllDepKnown) {
+ // If it's a call or a barrier, add dependencies on the defs and uses of
+ // instruction.
+ for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = ExitMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TRI->isPhysicalRegister(Reg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+ else {
+ assert(!IsPostRA && "Virtual register encountered after regalloc.");
+ if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(&ExitSU, i);
+ }
+ }
+ } else {
+ // For others, e.g. fallthrough, conditional branch, assume the exit
+ // uses all the registers that are livein to the successor blocks.
+ assert(Uses.empty() && "Uses in set before adding deps?");
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ if (!Uses.contains(Reg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+ }
+ }
+}
+
+/// MO is an operand of SU's instruction that defines a physical register. Add
+/// data dependencies from SU to any uses of the physical register.
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
+ assert(MO.isDef() && "expect physreg def");
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
+ if (!Uses.contains(*Alias))
+ continue;
+ for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
+ SUnit *UseSU = I->SU;
+ if (UseSU == SU)
+ continue;
+
+ // Adjust the dependence latency using operand def/use information,
+ // then allow the target to perform its own adjustments.
+ int UseOp = I->OpIdx;
+ MachineInstr *RegUse = 0;
+ SDep Dep;
+ if (UseOp < 0)
+ Dep = SDep(SU, SDep::Artificial);
+ else {
+ Dep = SDep(SU, SDep::Data, *Alias);
+ RegUse = UseSU->getInstr();
+ Dep.setMinLatency(
+ SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp, /*FindMin=*/true));
+ }
+ Dep.setLatency(
+ SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp, /*FindMin=*/false));
+
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ UseSU->addPred(Dep);
+ }
+ }
+}
+
+/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
+/// this SUnit to following instructions in the same scheduling region that
+/// depend the physical register referenced at OperIdx.
+void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ const MachineOperand &MO = MI->getOperand(OperIdx);
+
+ // Optionally add output and anti dependencies. For anti
+ // dependencies we use a latency of 0 because for a multi-issue
+ // target we want to allow the defining instruction to issue
+ // in the same cycle as the using instruction.
+ // TODO: Using a latency of 1 here for output dependencies assumes
+ // there's no cost for reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
+ if (!Defs.contains(*Alias))
+ continue;
+ for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
+ SUnit *DefSU = I->SU;
+ if (DefSU == &ExitSU)
+ continue;
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(*Alias))) {
+ if (Kind == SDep::Anti)
+ DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
+ else {
+ SDep Dep(SU, Kind, /*Reg=*/*Alias);
+ unsigned OutLatency =
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+ Dep.setMinLatency(OutLatency);
+ Dep.setLatency(OutLatency);
+ DefSU->addPred(Dep);
+ }
+ }
+ }
+ }
+
+ if (!MO.isDef()) {
+ // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+ // retrieve the existing SUnits list for this register's uses.
+ // Push this SUnit on the use list.
+ Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
+ }
+ else {
+ addPhysRegDataDeps(SU, OperIdx);
+ unsigned Reg = MO.getReg();
+
+ // clear this register's use list
+ if (Uses.contains(Reg))
+ Uses.eraseAll(Reg);
+
+ if (!MO.isDead()) {
+ Defs.eraseAll(Reg);
+ } else if (SU->isCall) {
+ // Calls will not be reordered because of chain dependencies (see
+ // below). Since call operands are dead, calls may continue to be added
+ // to the DefList making dependence checking quadratic in the size of
+ // the block. Instead, we leave only one call at the back of the
+ // DefList.
+ Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
+ Reg2SUnitsMap::iterator B = P.first;
+ Reg2SUnitsMap::iterator I = P.second;
+ for (bool isBegin = I == B; !isBegin; /* empty */) {
+ isBegin = (--I) == B;
+ if (!I->SU->isCall)
+ break;
+ I = Defs.erase(I);
+ }
+ }
+
+ // Defs are pushed in the order they are visited and never reordered.
+ Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
+ }
+}
+
+/// addVRegDefDeps - Add register output and data dependencies from this SUnit
+/// to instructions that occur later in the same scheduling region if they read
+/// from or write to the virtual register defined at OperIdx.
+///
+/// TODO: Hoist loop induction variable increments. This has to be
+/// reevaluated. Generally, IV scheduling should be done before coalescing.
+void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+ // Singly defined vregs do not have output/anti dependencies.
+ // The current operand is a def, so we have at least one.
+ // Check here if there are any others...
+ if (MRI.hasOneDef(Reg))
+ return;
+
+ // Add output dependence to the next nearest def of this vreg.
+ //
+ // Unless this definition is dead, the output dependence should be
+ // transitively redundant with antidependencies from this definition's
+ // uses. We're conservative for now until we have a way to guarantee the uses
+ // are not eliminated sometime during scheduling. The output dependence edge
+ // is also useful if output latency exceeds def-use latency.
+ VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
+ if (DefI == VRegDefs.end())
+ VRegDefs.insert(VReg2SUnit(Reg, SU));
+ else {
+ SUnit *DefSU = DefI->SU;
+ if (DefSU != SU && DefSU != &ExitSU) {
+ SDep Dep(SU, SDep::Output, Reg);
+ unsigned OutLatency =
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+ Dep.setMinLatency(OutLatency);
+ Dep.setLatency(OutLatency);
+ DefSU->addPred(Dep);
+ }
+ DefI->SU = SU;
+ }
+}
+
+/// addVRegUseDeps - Add a register data dependency if the instruction that
+/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
+/// register antidependency from this SUnit to instructions that occur later in
+/// the same scheduling region if they write the virtual register.
+///
+/// TODO: Handle ExitSU "uses" properly.
+void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+ // Lookup this operand's reaching definition.
+ assert(LIS && "vreg dependencies requires LiveIntervals");
+ LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
+ VNInfo *VNI = LRQ.valueIn();
+
+ // VNI will be valid because MachineOperand::readsReg() is checked by caller.
+ assert(VNI && "No value to read by operand");
+ MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
+ // Phis and other noninstructions (after coalescing) have a NULL Def.
+ if (Def) {
+ SUnit *DefSU = getSUnit(Def);
+ if (DefSU) {
+ // The reaching Def lives within this scheduling region.
+ // Create a data dependence.
+ SDep dep(DefSU, SDep::Data, Reg);
+ // Adjust the dependence latency using operand def/use information, then
+ // allow the target to perform its own adjustments.
+ int DefOp = Def->findRegisterDefOperandIdx(Reg);
+ dep.setLatency(
+ SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
+ dep.setMinLatency(
+ SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
+
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
+ SU->addPred(dep);
+ }
+ }
+
+ // Add antidependence to the following def of the vreg it uses.
+ VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
+ if (DefI != VRegDefs.end() && DefI->SU != SU)
+ DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
+}
+
+/// Return true if MI is an instruction we are unable to reason about
+/// (like a call or something with unmodeled side effects).
+static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
+ if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasOrderedMemoryRef() &&
+ (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
+ return true;
+ return false;
+}
+
+// This MI might have either incomplete info, or known to be unsafe
+// to deal with (i.e. volatile object).
+static inline bool isUnsafeMemoryObject(MachineInstr *MI,
+ const MachineFrameInfo *MFI) {
+ if (!MI || MI->memoperands_empty())
+ return true;
+ // We purposefully do no check for hasOneMemOperand() here
+ // in hope to trigger an assert downstream in order to
+ // finish implementation.
+ if ((*MI->memoperands_begin())->isVolatile() ||
+ MI->hasUnmodeledSideEffects())
+ return true;
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
+ return true;
+
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(V, Objs);
+ for (SmallVector<Value *, 4>::iterator I = Objs.begin(),
+ IE = Objs.end(); I != IE; ++I) {
+ V = *I;
+
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // Similarly to getUnderlyingObjectForInstr:
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(MFI))
+ return true;
+ }
+
+ // Does this pointer refer to a distinct and identifiable object?
+ if (!isIdentifiedObject(V))
+ return true;
+ }
+
+ return false;
+}
+
+/// This returns true if the two MIs need a chain edge betwee them.
+/// If these are not even memory operations, we still may need
+/// chain deps between them. The question really is - could
+/// these two MIs be reordered during scheduling from memory dependency
+/// point of view.
+static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ MachineInstr *MIa,
+ MachineInstr *MIb) {
+ // Cover a trivial case - no edge is need to itself.
+ if (MIa == MIb)
+ return false;
+
+ if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
+ return true;
+
+ // If we are dealing with two "normal" loads, we do not need an edge
+ // between them - they could be reordered.
+ if (!MIa->mayStore() && !MIb->mayStore())
+ return false;
+
+ // To this point analysis is generic. From here on we do need AA.
+ if (!AA)
+ return true;
+
+ MachineMemOperand *MMOa = *MIa->memoperands_begin();
+ MachineMemOperand *MMOb = *MIb->memoperands_begin();
+
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+ llvm_unreachable("Multiple memory operands.");
+
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // FIXME: Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
+ int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
+ int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+
+ AliasAnalysis::AliasResult AAResult = AA->alias(
+ AliasAnalysis::Location(MMOa->getValue(), Overlapa,
+ MMOa->getTBAAInfo()),
+ AliasAnalysis::Location(MMOb->getValue(), Overlapb,
+ MMOb->getTBAAInfo()));
+
+ return (AAResult != AliasAnalysis::NoAlias);
+}
+
+/// This recursive function iterates over chain deps of SUb looking for
+/// "latest" node that needs a chain edge to SUa.
+static unsigned
+iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
+ SmallPtrSet<const SUnit*, 16> &Visited) {
+ if (!SUa || !SUb || SUb == ExitSU)
+ return *Depth;
+
+ // Remember visited nodes.
+ if (!Visited.insert(SUb))
+ return *Depth;
+ // If there is _some_ dependency already in place, do not
+ // descend any further.
+ // TODO: Need to make sure that if that dependency got eliminated or ignored
+ // for any reason in the future, we would not violate DAG topology.
+ // Currently it does not happen, but makes an implicit assumption about
+ // future implementation.
+ //
+ // Independently, if we encounter node that is some sort of global
+ // object (like a call) we already have full set of dependencies to it
+ // and we can stop descending.
+ if (SUa->isSucc(SUb) ||
+ isGlobalMemoryObject(AA, SUb->getInstr()))
+ return *Depth;
+
+ // If we do need an edge, or we have exceeded depth budget,
+ // add that edge to the predecessors chain of SUb,
+ // and stop descending.
+ if (*Depth > 200 ||
+ MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ SUb->addPred(SDep(SUa, SDep::MayAliasMem));
+ return *Depth;
+ }
+ // Track current depth.
+ (*Depth)++;
+ // Iterate over chain dependencies only.
+ for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+ return *Depth;
+}
+
+/// This function assumes that "downward" from SU there exist
+/// tail/leaf of already constructed DAG. It iterates downward and
+/// checks whether SU can be aliasing any node dominated
+/// by it.
+static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+ unsigned LatencyToLoad) {
+ if (!SU)
+ return;
+
+ SmallPtrSet<const SUnit*, 16> Visited;
+ unsigned Depth = 0;
+
+ for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
+ I != IE; ++I) {
+ if (SU == *I)
+ continue;
+ if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+ SDep Dep(SU, SDep::MayAliasMem);
+ Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
+ (*I)->addPred(Dep);
+ }
+ // Now go through all the chain successors and iterate from them.
+ // Keep track of visited nodes.
+ for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
+ JE = (*I)->Succs.end(); J != JE; ++J)
+ if (J->isCtrl())
+ iterateChainSucc (AA, MFI, SU, J->getSUnit(),
+ ExitSU, &Depth, Visited);
+ }
+}
+
+/// Check whether two objects need a chain edge, if so, add it
+/// otherwise remember the rejected SU.
+static inline
+void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SUa, SUnit *SUb,
+ std::set<SUnit *> &RejectList,
+ unsigned TrueMemOrderLatency = 0,
+ bool isNormalMemory = false) {
+ // If this is a false dependency,
+ // do not add the edge, but rememeber the rejected node.
+ if (!EnableAASchedMI ||
+ MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
+ Dep.setLatency(TrueMemOrderLatency);
+ SUb->addPred(Dep);
+ }
+ else {
+ // Duplicate entries should be ignored.
+ RejectList.insert(SUb);
+ DEBUG(dbgs() << "\tReject chain dep between SU("
+ << SUa->NodeNum << ") and SU("
+ << SUb->NodeNum << ")\n");
+ }
+}
+
+/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// order. The instruction order A < B, implies that no edge exists from B to A.
+///
+/// Map each real instruction to its SUnit.
+///
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
+void ScheduleDAGInstrs::initSUnits() {
+ // We'll be allocating one SUnit for each real instruction in the region,
+ // which is contained within a basic block.
+ SUnits.reserve(BB->size());
+
+ for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+
+ SUnit *SU = newSUnit(MI);
+ MISUnitMap[MI] = SU;
+
+ SU->isCall = MI->isCall();
+ SU->isCommutable = MI->isCommutable();
+
+ // Assign the Latency field of SU using target-provided information.
+ SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
+ }
+}
+
+/// If RegPressure is non null, compute register pressure as a side effect. The
+/// DAG builder is an efficient place to do it because it already visits
+/// operands.
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
+ RegPressureTracker *RPTracker) {
+ // Create an SUnit for each real instruction.
+ initSUnits();
+
+ // We build scheduling units by walking a block's instruction list from bottom
+ // to top.
+
+ // Remember where a generic side-effecting instruction is as we procede.
+ SUnit *BarrierChain = 0, *AliasChain = 0;
+
+ // Memory references to specific known memory locations are tracked
+ // so that they can be given more precise dependencies. We track
+ // separately the known memory locations that may alias and those
+ // that are known not to alias
+ MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+ MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+ std::set<SUnit*> RejectMemNodes;
+
+ // Remove any stale debug info; sometimes BuildSchedGraph is called again
+ // without emitting the info from the previous call.
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+
+ assert(Defs.empty() && Uses.empty() &&
+ "Only BuildGraph should update Defs/Uses");
+ Defs.setUniverse(TRI->getNumRegs());
+ Uses.setUniverse(TRI->getNumRegs());
+
+ assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
+ // FIXME: Allow SparseSet to reserve space for the creation of virtual
+ // registers during scheduling. Don't artificially inflate the Universe
+ // because we want to assert that vregs are not created during DAG building.
+ VRegDefs.setUniverse(MRI.getNumVirtRegs());
+
+ // Model data dependencies between instructions being scheduled and the
+ // ExitSU.
+ addSchedBarrierDeps();
+
+ // Walk the list of instructions, from bottom moving up.
+ MachineInstr *DbgMI = NULL;
+ for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
+ MII != MIE; --MII) {
+ MachineInstr *MI = prior(MII);
+ if (MI && DbgMI) {
+ DbgValues.push_back(std::make_pair(DbgMI, MI));
+ DbgMI = NULL;
+ }
+
+ if (MI->isDebugValue()) {
+ DbgMI = MI;
+ continue;
+ }
+ if (RPTracker) {
+ RPTracker->recede();
+ assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
+ }
+
+ assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) &&
+ "Cannot schedule terminators or labels!");
+
+ SUnit *SU = MISUnitMap[MI];
+ assert(SU && "No SUnit mapped to this MI");
+
+ // Add register-based dependencies (data, anti, and output).
+ bool HasVRegDef = false;
+ for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TRI->isPhysicalRegister(Reg))
+ addPhysRegDeps(SU, j);
+ else {
+ assert(!IsPostRA && "Virtual register encountered!");
+ if (MO.isDef()) {
+ HasVRegDef = true;
+ addVRegDefDeps(SU, j);
+ }
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(SU, j);
+ }
+ }
+ // If we haven't seen any uses in this scheduling region, create a
+ // dependence edge to ExitSU to model the live-out latency. This is required
+ // for vreg defs with no in-region use, and prefetches with no vreg def.
+ //
+ // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
+ // check currently relies on being called before adding chain deps.
+ if (SU->NumSuccs == 0 && SU->Latency > 1
+ && (HasVRegDef || MI->mayLoad())) {
+ SDep Dep(SU, SDep::Artificial);
+ Dep.setLatency(SU->Latency - 1);
+ ExitSU.addPred(Dep);
+ }
+
+ // Add chain dependencies.
+ // Chain dependencies used to enforce memory order should have
+ // latency of 0 (except for true dependency of Store followed by
+ // aliased Load... we estimate that with a single cycle of latency
+ // assuming the hardware will bypass)
+ // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+ // after stack slots are lowered to actual addresses.
+ // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+ // produce more precise dependence information.
+ unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
+ if (isGlobalMemoryObject(AA, MI)) {
+ // Be conservative with these and add dependencies on all memory
+ // references, even those that are known to not alias.
+ for (MapVector<const Value *, SUnit *>::iterator I =
+ NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Barrier));
+ }
+ for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+ SDep Dep(SU, SDep::Barrier);
+ Dep.setLatency(TrueMemOrderLatency);
+ I->second[i]->addPred(Dep);
+ }
+ }
+ // Add SU to the barrier chain.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
+ BarrierChain = SU;
+ // This is a barrier event that acts as a pivotal node in the DAG,
+ // so it is safe to clear list of exposed nodes.
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ RejectMemNodes.clear();
+ NonAliasMemDefs.clear();
+ NonAliasMemUses.clear();
+
+ // fall-through
+ new_alias_chain:
+ // Chain all possibly aliasing memory references though SU.
+ if (AliasChain) {
+ unsigned ChainLatency = 0;
+ if (AliasChain->getInstr()->mayLoad())
+ ChainLatency = TrueMemOrderLatency;
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
+ ChainLatency);
+ }
+ AliasChain = SU;
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
+ for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+ E = AliasMemDefs.end(); I != E; ++I)
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+ for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
+ TrueMemOrderLatency);
+ }
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ PendingLoads.clear();
+ AliasMemDefs.clear();
+ AliasMemUses.clear();
+ } else if (MI->mayStore()) {
+ SmallVector<std::pair<const Value *, bool>, 4> Objs;
+ getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+ if (Objs.empty()) {
+ // Treat all other stores conservatively.
+ goto new_alias_chain;
+ }
+
+ bool MayAlias = false;
+ for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+ K = Objs.begin(), KE = Objs.end(); K != KE; ++K) {
+ const Value *V = K->first;
+ bool ThisMayAlias = K->second;
+ if (ThisMayAlias)
+ MayAlias = true;
+
+ // A store to a specific PseudoSourceValue. Add precise dependencies.
+ // Record the def in MemDefs, first adding a dep if there is
+ // an existing def.
+ MapVector<const Value *, SUnit *>::iterator I =
+ ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ MapVector<const Value *, SUnit *>::iterator IE =
+ ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE) {
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+ I->second = SU;
+ } else {
+ if (ThisMayAlias)
+ AliasMemDefs[V] = SU;
+ else
+ NonAliasMemDefs[V] = SU;
+ }
+ // Handle the uses in MemUses, if there are any.
+ MapVector<const Value *, std::vector<SUnit *> >::iterator J =
+ ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+ MapVector<const Value *, std::vector<SUnit *> >::iterator JE =
+ ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+ if (J != JE) {
+ for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+ addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
+ TrueMemOrderLatency, true);
+ J->second.clear();
+ }
+ }
+ if (MayAlias) {
+ // Add dependencies from all the PendingLoads, i.e. loads
+ // with no underlying object.
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
+ // Add dependence on alias chain, if needed.
+ if (AliasChain)
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ // But we also should check dependent instructions for the
+ // SU in question.
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ }
+ // Add dependence on barrier chain, if needed.
+ // There is no point to check aliasing on barrier event. Even if
+ // SU and barrier _could_ be reordered, they should not. In addition,
+ // we have lost all RejectMemNodes below barrier.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
+
+ if (!ExitSU.isPred(SU))
+ // Push store's up a bit to avoid them getting in between cmp
+ // and branches.
+ ExitSU.addPred(SDep(SU, SDep::Artificial));
+ } else if (MI->mayLoad()) {
+ bool MayAlias = true;
+ if (MI->isInvariantLoad(AA)) {
+ // Invariant load, no chain dependencies needed!
+ } else {
+ SmallVector<std::pair<const Value *, bool>, 4> Objs;
+ getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+ if (Objs.empty()) {
+ // A load with no underlying object. Depend on all
+ // potentially aliasing stores.
+ for (MapVector<const Value *, SUnit *>::iterator I =
+ AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+
+ PendingLoads.push_back(SU);
+ MayAlias = true;
+ } else {
+ MayAlias = false;
+ }
+
+ for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+ J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
+ const Value *V = J->first;
+ bool ThisMayAlias = J->second;
+
+ if (ThisMayAlias)
+ MayAlias = true;
+
+ // A load from a specific PseudoSourceValue. Add precise dependencies.
+ MapVector<const Value *, SUnit *>::iterator I =
+ ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ MapVector<const Value *, SUnit *>::iterator IE =
+ ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE)
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+ if (ThisMayAlias)
+ AliasMemUses[V].push_back(SU);
+ else
+ NonAliasMemUses[V].push_back(SU);
+ }
+ if (MayAlias)
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
+ // Add dependencies on alias and barrier chains, if needed.
+ if (MayAlias && AliasChain)
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
+ }
+ }
+ }
+ if (DbgMI)
+ FirstDbgValue = DbgMI;
+
+ Defs.clear();
+ Uses.clear();
+ VRegDefs.clear();
+ PendingLoads.clear();
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ SU->getInstr()->dump();
+#endif
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ if (SU == &EntrySU)
+ oss << "<entry>";
+ else if (SU == &ExitSU)
+ oss << "<exit>";
+ else
+ SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
+ return oss.str();
+}
+
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+ return "dag." + BB->getFullName();
+}
+
+//===----------------------------------------------------------------------===//
+// SchedDFSResult Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+/// \brief Internal state used to compute SchedDFSResult.
+class SchedDFSImpl {
+ SchedDFSResult &R;
+
+ /// Join DAG nodes into equivalence classes by their subtree.
+ IntEqClasses SubtreeClasses;
+ /// List PredSU, SuccSU pairs that represent data edges between subtrees.
+ std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+
+ struct RootData {
+ unsigned NodeID;
+ unsigned ParentNodeID; // Parent node (member of the parent subtree).
+ unsigned SubInstrCount; // Instr count in this tree only, not children.
+
+ RootData(unsigned id): NodeID(id),
+ ParentNodeID(SchedDFSResult::InvalidSubtreeID),
+ SubInstrCount(0) {}
+
+ unsigned getSparseSetIndex() const { return NodeID; }
+ };
+
+ SparseSet<RootData> RootSet;
+
+public:
+ SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
+ RootSet.setUniverse(R.DFSNodeData.size());
+ }
+
+ /// Return true if this node been visited by the DFS traversal.
+ ///
+ /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
+ /// ID. Later, SubtreeID is updated but remains valid.
+ bool isVisited(const SUnit *SU) const {
+ return R.DFSNodeData[SU->NodeNum].SubtreeID
+ != SchedDFSResult::InvalidSubtreeID;
+ }
+
+ /// Initialize this node's instruction count. We don't need to flag the node
+ /// visited until visitPostorder because the DAG cannot have cycles.
+ void visitPreorder(const SUnit *SU) {
+ R.DFSNodeData[SU->NodeNum].InstrCount =
+ SU->getInstr()->isTransient() ? 0 : 1;
+ }
+
+ /// Called once for each node after all predecessors are visited. Revisit this
+ /// node's predecessors and potentially join them now that we know the ILP of
+ /// the other predecessors.
+ void visitPostorderNode(const SUnit *SU) {
+ // Mark this node as the root of a subtree. It may be joined with its
+ // successors later.
+ R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
+ RootData RData(SU->NodeNum);
+ RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
+
+ // If any predecessors are still in their own subtree, they either cannot be
+ // joined or are large enough to remain separate. If this parent node's
+ // total instruction count is not greater than a child subtree by at least
+ // the subtree limit, then try to join it now since splitting subtrees is
+ // only useful if multiple high-pressure paths are possible.
+ unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
+ for (SUnit::const_pred_iterator
+ PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+ if (PI->getKind() != SDep::Data)
+ continue;
+ unsigned PredNum = PI->getSUnit()->NodeNum;
+ if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
+ joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
+
+ // Either link or merge the TreeData entry from the child to the parent.
+ if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
+ // If the predecessor's parent is invalid, this is a tree edge and the
+ // current node is the parent.
+ if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
+ RootSet[PredNum].ParentNodeID = SU->NodeNum;
+ }
+ else if (RootSet.count(PredNum)) {
+ // The predecessor is not a root, but is still in the root set. This
+ // must be the new parent that it was just joined to. Note that
+ // RootSet[PredNum].ParentNodeID may either be invalid or may still be
+ // set to the original parent.
+ RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
+ RootSet.erase(PredNum);
+ }
+ }
+ RootSet[SU->NodeNum] = RData;
+ }
+
+ /// Called once for each tree edge after calling visitPostOrderNode on the
+ /// predecessor. Increment the parent node's instruction count and
+ /// preemptively join this subtree to its parent's if it is small enough.
+ void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
+ R.DFSNodeData[Succ->NodeNum].InstrCount
+ += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
+ joinPredSubtree(PredDep, Succ);
+ }
+
+ /// Add a connection for cross edges.
+ void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
+ ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
+ }
+
+ /// Set each node's subtree ID to the representative ID and record connections
+ /// between trees.
+ void finalize() {
+ SubtreeClasses.compress();
+ R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
+ assert(SubtreeClasses.getNumClasses() == RootSet.size()
+ && "number of roots should match trees");
+ for (SparseSet<RootData>::const_iterator
+ RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
+ unsigned TreeID = SubtreeClasses[RI->NodeID];
+ if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
+ R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
+ R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
+ // Note that SubInstrCount may be greater than InstrCount if we joined
+ // subtrees across a cross edge. InstrCount will be attributed to the
+ // original parent, while SubInstrCount will be attributed to the joined
+ // parent.
+ }
+ R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
+ R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
+ DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
+ for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
+ R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
+ DEBUG(dbgs() << " SU(" << Idx << ") in tree "
+ << R.DFSNodeData[Idx].SubtreeID << '\n');
+ }
+ for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
+ I = ConnectionPairs.begin(), E = ConnectionPairs.end();
+ I != E; ++I) {
+ unsigned PredTree = SubtreeClasses[I->first->NodeNum];
+ unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
+ if (PredTree == SuccTree)
+ continue;
+ unsigned Depth = I->first->getDepth();
+ addConnection(PredTree, SuccTree, Depth);
+ addConnection(SuccTree, PredTree, Depth);
+ }
+ }
+
+protected:
+ /// Join the predecessor subtree with the successor that is its DFS
+ /// parent. Apply some heuristics before joining.
+ bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
+ bool CheckLimit = true) {
+ assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
+
+ // Check if the predecessor is already joined.
+ const SUnit *PredSU = PredDep.getSUnit();
+ unsigned PredNum = PredSU->NodeNum;
+ if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
+ return false;
+
+ // Four is the magic number of successors before a node is considered a
+ // pinch point.
+ unsigned NumDataSucs = 0;
+ for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
+ SE = PredSU->Succs.end(); SI != SE; ++SI) {
+ if (SI->getKind() == SDep::Data) {
+ if (++NumDataSucs >= 4)
+ return false;
+ }
+ }
+ if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
+ return false;
+ R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
+ SubtreeClasses.join(Succ->NodeNum, PredNum);
+ return true;
+ }
+
+ /// Called by finalize() to record a connection between trees.
+ void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
+ if (!Depth)
+ return;
+
+ do {
+ SmallVectorImpl<SchedDFSResult::Connection> &Connections =
+ R.SubtreeConnections[FromTree];
+ for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
+ I = Connections.begin(), E = Connections.end(); I != E; ++I) {
+ if (I->TreeID == ToTree) {
+ I->Level = std::max(I->Level, Depth);
+ return;
+ }
+ }
+ Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
+ FromTree = R.DFSTreeData[FromTree].ParentTreeID;
+ } while (FromTree != SchedDFSResult::InvalidSubtreeID);
+ }
+};
+} // namespace llvm
+
+namespace {
+/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+ std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+public:
+ bool isComplete() const { return DFSStack.empty(); }
+
+ void follow(const SUnit *SU) {
+ DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+ }
+ void advance() { ++DFSStack.back().second; }
+
+ const SDep *backtrack() {
+ DFSStack.pop_back();
+ return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second);
+ }
+
+ const SUnit *getCurr() const { return DFSStack.back().first; }
+
+ SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+ SUnit::const_pred_iterator getPredEnd() const {
+ return getCurr()->Preds.end();
+ }
+};
+} // anonymous
+
+static bool hasDataSucc(const SUnit *SU) {
+ for (SUnit::const_succ_iterator
+ SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
+ if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
+ return true;
+ }
+ return false;
+}
+
+/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
+ if (!IsBottomUp)
+ llvm_unreachable("Top-down ILP metric is unimplemnted");
+
+ SchedDFSImpl Impl(*this);
+ for (ArrayRef<SUnit>::const_iterator
+ SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
+ const SUnit *SU = &*SI;
+ if (Impl.isVisited(SU) || hasDataSucc(SU))
+ continue;
+
+ SchedDAGReverseDFS DFS;
+ Impl.visitPreorder(SU);
+ DFS.follow(SU);
+ for (;;) {
+ // Traverse the leftmost path as far as possible.
+ while (DFS.getPred() != DFS.getPredEnd()) {
+ const SDep &PredDep = *DFS.getPred();
+ DFS.advance();
+ // Ignore non-data edges.
+ if (PredDep.getKind() != SDep::Data
+ || PredDep.getSUnit()->isBoundaryNode()) {
+ continue;
+ }
+ // An already visited edge is a cross edge, assuming an acyclic DAG.
+ if (Impl.isVisited(PredDep.getSUnit())) {
+ Impl.visitCrossEdge(PredDep, DFS.getCurr());
+ continue;
+ }
+ Impl.visitPreorder(PredDep.getSUnit());
+ DFS.follow(PredDep.getSUnit());
+ }
+ // Visit the top of the stack in postorder and backtrack.
+ const SUnit *Child = DFS.getCurr();
+ const SDep *PredDep = DFS.backtrack();
+ Impl.visitPostorderNode(Child);
+ if (PredDep)
+ Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
+ if (DFS.isComplete())
+ break;
+ }
+ }
+ Impl.finalize();
+}
+
+/// The root of the given SubtreeID was just scheduled. For all subtrees
+/// connected to this tree, record the depth of the connection so that the
+/// nearest connected subtrees can be prioritized.
+void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
+ for (SmallVectorImpl<Connection>::const_iterator
+ I = SubtreeConnections[SubtreeID].begin(),
+ E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
+ SubtreeConnectLevels[I->TreeID] =
+ std::max(SubtreeConnectLevels[I->TreeID], I->Level);
+ DEBUG(dbgs() << " Tree: " << I->TreeID
+ << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ILPValue::print(raw_ostream &OS) const {
+ OS << InstrCount << " / " << Length << " = ";
+ if (!Length)
+ OS << "BADILP";
+ else
+ OS << format("%g", ((double)InstrCount / Length));
+}
+
+void ILPValue::dump() const {
+ dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+ Val.print(OS);
+ return OS;
+}
+
+} // namespace llvm
+#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 0000000..8ddb3e8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,100 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool isNodeHidden(const SUnit *Node) {
+ return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ }
+
+ static bool hasNodeAddressLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(ScheduleDAG *G,
+ GraphWriter<ScheduleDAG*> &GW) {
+ return G->addCustomGraphFeatures(GW);
+ }
+ };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+ const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+ // This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
new file mode 100644
index 0000000..2cd84d6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -0,0 +1,248 @@
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *SchedDAG,
+ const char *ParentDebugType) :
+ ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+ IssueCount(0) {
+
+#ifndef NDEBUG
+ DebugType = ParentDebugType;
+#endif
+
+ // Determine the maximum depth of any itinerary. This determines the depth of
+ // the scoreboard. We always make the scoreboard at least 1 cycle deep to
+ // avoid dealing with the boundary condition.
+ unsigned ScoreboardDepth = 1;
+ if (ItinData && !ItinData->isEmpty()) {
+ for (unsigned idx = 0; ; ++idx) {
+ if (ItinData->isEndMarker(idx))
+ break;
+
+ const InstrStage *IS = ItinData->beginStage(idx);
+ const InstrStage *E = ItinData->endStage(idx);
+ unsigned CurCycle = 0;
+ unsigned ItinDepth = 0;
+ for (; IS != E; ++IS) {
+ unsigned StageDepth = CurCycle + IS->getCycles();
+ if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+ CurCycle += IS->getNextCycles();
+ }
+
+ // Find the next power-of-2 >= ItinDepth
+ while (ItinDepth > ScoreboardDepth) {
+ ScoreboardDepth *= 2;
+ // Don't set MaxLookAhead until we find at least one nonzero stage.
+ // This way, an itinerary with no stages has MaxLookAhead==0, which
+ // completely bypasses the scoreboard hazard logic.
+ MaxLookAhead = ScoreboardDepth;
+ }
+ }
+ }
+
+ ReservedScoreboard.reset(ScoreboardDepth);
+ RequiredScoreboard.reset(ScoreboardDepth);
+
+ // If MaxLookAhead is not set above, then we are not enabled.
+ if (!isEnabled())
+ DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
+ else {
+ // A nonempty itinerary must have a SchedModel.
+ IssueWidth = ItinData->SchedModel->IssueWidth;
+ DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+ << ScoreboardDepth << '\n');
+ }
+}
+
+void ScoreboardHazardRecognizer::Reset() {
+ IssueCount = 0;
+ RequiredScoreboard.reset();
+ ReservedScoreboard.reset();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+ dbgs() << "Scoreboard:\n";
+
+ unsigned last = Depth - 1;
+ while ((last > 0) && ((*this)[last] == 0))
+ last--;
+
+ for (unsigned i = 0; i <= last; i++) {
+ unsigned FUs = (*this)[i];
+ dbgs() << "\t";
+ for (int j = 31; j >= 0; j--)
+ dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+ dbgs() << '\n';
+ }
+}
+#endif
+
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+ if (IssueWidth == 0)
+ return false;
+
+ return IssueCount == IssueWidth;
+}
+
+ScheduleHazardRecognizer::HazardType
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (!ItinData || ItinData->isEmpty())
+ return NoHazard;
+
+ // Note that stalls will be negative for bottom-up scheduling.
+ int cycle = Stalls;
+
+ // Use the itinerary for the underlying instruction to check for
+ // free FU's in the scoreboard at the appropriate future cycles.
+
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (MCID == NULL) {
+ // Don't check hazards for non-machineinstr Nodes.
+ return NoHazard;
+ }
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must find one of the stage's units free for every cycle the
+ // stage is occupied. FIXME it would be more accurate to find the
+ // same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ int StageCycle = cycle + (int)i;
+ if (StageCycle < 0)
+ continue;
+
+ if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+ assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+ "Scoreboard depth exceeded!");
+ // This stage was stalled beyond pipeline depth, so cannot conflict.
+ break;
+ }
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[StageCycle];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[StageCycle];
+ break;
+ }
+
+ if (!freeUnits) {
+ DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+ return Hazard;
+ }
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ return NoHazard;
+}
+
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (!ItinData || ItinData->isEmpty())
+ return;
+
+ // Use the itinerary for the underlying instruction to reserve FU's
+ // in the scoreboard at the appropriate future cycles.
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ assert(MCID && "The scheduler must filter non-machineinstrs");
+ if (DAG->TII->isZeroCost(MCID->Opcode))
+ return;
+
+ ++IssueCount;
+
+ unsigned cycle = 0;
+
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must reserve one of the stage's units for every cycle the
+ // stage is occupied. FIXME it would be more accurate to reserve
+ // the same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[cycle + i];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[cycle + i];
+ break;
+ }
+
+ // reduce to a single unit
+ unsigned freeUnit = 0;
+ do {
+ freeUnit = freeUnits;
+ freeUnits = freeUnit & (freeUnit - 1);
+ } while (freeUnits);
+
+ if (IS->getReservationKind() == InstrStage::Required)
+ RequiredScoreboard[cycle + i] |= freeUnit;
+ else
+ ReservedScoreboard[cycle + i] |= freeUnit;
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ DEBUG(ReservedScoreboard.dump());
+ DEBUG(RequiredScoreboard.dump());
+}
+
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+ RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
+}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+ ReservedScoreboard.recede();
+ RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+ RequiredScoreboard.recede();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 0000000..eb16095
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,10214 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
+
+namespace {
+ static cl::opt<bool>
+ CombinerAA("combiner-alias-analysis", cl::Hidden,
+ cl::desc("Turn on alias analysis during testing"));
+
+ static cl::opt<bool>
+ CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+ class DAGCombiner {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ CombineLevel Level;
+ CodeGenOpt::Level OptLevel;
+ bool LegalOperations;
+ bool LegalTypes;
+
+ // Worklist of all of the nodes that need to be simplified.
+ //
+ // This has the semantics that when adding to the worklist,
+ // the item added must be next to be processed. It should
+ // also only appear once. The naive approach to this takes
+ // linear time.
+ //
+ // To reduce the insert/remove time to logarithmic, we use
+ // a set and a vector to maintain our worklist.
+ //
+ // The set contains the items on the worklist, but does not
+ // maintain the order they should be visited.
+ //
+ // The vector maintains the order nodes should be visited, but may
+ // contain duplicate or removed nodes. When choosing a node to
+ // visit, we pop off the order stack until we find an item that is
+ // also in the contents set. All operations are O(log N).
+ SmallPtrSet<SDNode*, 64> WorkListContents;
+ SmallVector<SDNode*, 64> WorkListOrder;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis &AA;
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(SDNode *N) {
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ AddToWorkList(*UI);
+ }
+
+ /// visit - call the node-specific routine that knows how to fold each
+ /// particular type of node.
+ SDValue visit(SDNode *N);
+
+ public:
+ /// AddToWorkList - Add to the work list making sure its instance is at the
+ /// back (next to be processed.)
+ void AddToWorkList(SDNode *N) {
+ WorkListContents.insert(N);
+ WorkListOrder.push_back(N);
+ }
+
+ /// removeFromWorkList - remove all instances of N from the worklist.
+ ///
+ void removeFromWorkList(SDNode *N) {
+ WorkListContents.erase(N);
+ }
+
+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo = true);
+
+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+ bool AddTo = true) {
+ SDValue To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+
+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+ private:
+
+ /// SimplifyDemandedBits - Check the specified integer node value to see if
+ /// it can be simplified or if things it uses can be simplified by bit
+ /// propagation. If so, return true.
+ bool SimplifyDemandedBits(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
+ return SimplifyDemandedBits(Op, Demanded);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+
+ void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+ SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+ SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue PromoteIntBinOp(SDValue Op);
+ SDValue PromoteIntShiftOp(SDValue Op);
+ SDValue PromoteExtend(SDValue Op);
+ bool PromoteLoad(SDValue Op);
+
+ void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType);
+
+ /// combine - call the node-specific routine that knows how to fold each
+ /// particular type of node. If that doesn't do anything, try the
+ /// target-specific DAG combines.
+ SDValue combine(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDValue.getNode() == 0 - No change was made
+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDValue visitTokenFactor(SDNode *N);
+ SDValue visitMERGE_VALUES(SDNode *N);
+ SDValue visitADD(SDNode *N);
+ SDValue visitSUB(SDNode *N);
+ SDValue visitADDC(SDNode *N);
+ SDValue visitSUBC(SDNode *N);
+ SDValue visitADDE(SDNode *N);
+ SDValue visitSUBE(SDNode *N);
+ SDValue visitMUL(SDNode *N);
+ SDValue visitSDIV(SDNode *N);
+ SDValue visitUDIV(SDNode *N);
+ SDValue visitSREM(SDNode *N);
+ SDValue visitUREM(SDNode *N);
+ SDValue visitMULHU(SDNode *N);
+ SDValue visitMULHS(SDNode *N);
+ SDValue visitSMUL_LOHI(SDNode *N);
+ SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitSMULO(SDNode *N);
+ SDValue visitUMULO(SDNode *N);
+ SDValue visitSDIVREM(SDNode *N);
+ SDValue visitUDIVREM(SDNode *N);
+ SDValue visitAND(SDNode *N);
+ SDValue visitOR(SDNode *N);
+ SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N);
+ SDValue SimplifyVUnaryOp(SDNode *N);
+ SDValue visitSHL(SDNode *N);
+ SDValue visitSRA(SDNode *N);
+ SDValue visitSRL(SDNode *N);
+ SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTPOP(SDNode *N);
+ SDValue visitSELECT(SDNode *N);
+ SDValue visitSELECT_CC(SDNode *N);
+ SDValue visitSETCC(SDNode *N);
+ SDValue visitSIGN_EXTEND(SDNode *N);
+ SDValue visitZERO_EXTEND(SDNode *N);
+ SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitTRUNCATE(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
+ SDValue visitBUILD_PAIR(SDNode *N);
+ SDValue visitFADD(SDNode *N);
+ SDValue visitFSUB(SDNode *N);
+ SDValue visitFMUL(SDNode *N);
+ SDValue visitFMA(SDNode *N);
+ SDValue visitFDIV(SDNode *N);
+ SDValue visitFREM(SDNode *N);
+ SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitSINT_TO_FP(SDNode *N);
+ SDValue visitUINT_TO_FP(SDNode *N);
+ SDValue visitFP_TO_SINT(SDNode *N);
+ SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitFP_ROUND(SDNode *N);
+ SDValue visitFP_ROUND_INREG(SDNode *N);
+ SDValue visitFP_EXTEND(SDNode *N);
+ SDValue visitFNEG(SDNode *N);
+ SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
+ SDValue visitBRCOND(SDNode *N);
+ SDValue visitBR_CC(SDNode *N);
+ SDValue visitLOAD(SDNode *N);
+ SDValue visitSTORE(SDNode *N);
+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue visitBUILD_VECTOR(SDNode *N);
+ SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
+ SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitMEMBARRIER(SDNode *N);
+
+ SDValue XformToShuffleWithZero(SDNode *N);
+ SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
+
+ SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+
+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+ SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans = true);
+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp);
+ SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue BuildSDIV(SDNode *N);
+ SDValue BuildUDIV(SDNode *N);
+ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits = true);
+ SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+ SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
+ SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+ SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
+
+ SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+ /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases);
+
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const;
+
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
+
+ /// FindAliasInfo - Extracts the relevant alias information from the memory
+ /// node. Returns true if the operand was a load.
+ bool FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset,
+ unsigned &SrcValueAlignment,
+ const MDNode *&TBAAInfo) const;
+
+ /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for a better chain (aliasing node.)
+ SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+ /// Merge consecutive store operations into a wide store.
+ /// This optimization uses wide integers or vectors when possible.
+ /// \return True if some memory operations were changed.
+ bool MergeConsecutiveStores(StoreSDNode *N);
+
+ public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+ OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
+
+ /// Run - runs the dag combiner on all nodes in the work list
+ void Run(CombineLevel AtLevel);
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+ /// getShiftAmountTy - Returns a type large enough to hold any valid
+ /// shift amount - before type legalization these can be huge.
+ EVT getShiftAmountTy(EVT LHSTy) {
+ return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy();
+ }
+
+ /// isTypeLegal - This method returns true if we are running before type
+ /// legalization or if the specified VT is legal.
+ bool isTypeLegal(const EVT &VT) {
+ if (!LegalTypes) return true;
+ return TLI.isTypeLegal(VT);
+ }
+ };
+}
+
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+public:
+ explicit WorkListRemover(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ DC.removeFromWorkList(N);
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->removeFromWorkList(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ const TargetLowering &TLI,
+ const TargetOptions *Options,
+ unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return 2;
+
+ // Don't allow anything with multiple uses.
+ if (!Op.hasOneUse()) return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > 6) return 0;
+
+ switch (Op.getOpcode()) {
+ default: return false;
+ case ISD::ConstantFP:
+ // Don't invert constant FP values after legalize. The negated constant
+ // isn't necessarily legal.
+ return LegalOperations ? 0 : 1;
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ if (!Options->UnsafeFPMath) return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
+ return 0;
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!Options->UnsafeFPMath) return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ if (Options->HonorSignDependentRoundingFPMath()) return 0;
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
+ return V;
+
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
+ Depth + 1);
+ }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+ // Don't allow anything with multiple uses.
+ assert(Op.hasOneUse() && "Unknown reuse!");
+
+ assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown code");
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, Op.getValueType());
+ }
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ assert(DAG.getTarget().Options.UnsafeFPMath);
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(0));
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ assert(DAG.getTarget().Options.UnsafeFPMath);
+
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (N0CFP->getValueAPF().isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0));
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(0),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1));
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking. This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+ if (N.getOpcode() == ISD::SELECT_CC &&
+ N.getOperand(2).getOpcode() == ISD::Constant &&
+ N.getOperand(3).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
+ cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+ }
+ return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use. If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDValue N) {
+ SDValue N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ return true;
+ return false;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
+ SDValue N0, SDValue N1) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+ if (isa<ConstantSDNode>(N1)) {
+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N0.getOperand(1)),
+ cast<ConstantSDNode>(N1));
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ }
+ if (N0.hasOneUse()) {
+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ }
+ }
+
+ if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+ if (isa<ConstantSDNode>(N0)) {
+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N1.getOperand(1)),
+ cast<ConstantSDNode>(N0));
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ }
+ if (N1.hasOneUse()) {
+ // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N1.getOperand(0), N0);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.1 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ To[0].getNode()->dump(&DAG);
+ dbgs() << " and " << NumTo-1 << " other values\n";
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
+ "Cannot combine value to value of different type!"));
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesWith(N, To);
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ if (To[i].getNode()) {
+ AddToWorkList(To[i].getNode());
+ AddUsersToWorkList(To[i].getNode());
+ }
+ }
+ }
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ return SDValue(N, 0);
+}
+
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ // Replace all uses. If any nodes become isomorphic to other nodes and
+ // are deleted, make sure to remove them from our worklist.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorkList(TLO.New.getNode());
+ AddUsersToWorkList(TLO.New.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (TLO.Old.getNode()->use_empty()) {
+ removeFromWorkList(TLO.Old.getNode());
+
+ // If the operands of this node are only used by the node, they will now
+ // be dead. Make sure to visit them first to delete dead nodes early.
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
+ DAG.DeleteNode(TLO.Old.getNode());
+ }
+}
+
+/// SimplifyDemandedBits - Check the specified integer node value to see if
+/// it can be simplified or if things it uses can be simplified by bit
+/// propagation. If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ APInt KnownZero, KnownOne;
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ return false;
+
+ // Revisit the node.
+ AddToWorkList(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.2 ";
+ TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: ";
+ TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+ DebugLoc dl = Load->getDebugLoc();
+ EVT VT = Load->getValueType(0);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+
+ DEBUG(dbgs() << "\nReplacing.9 ";
+ Load->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Trunc.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
+ removeFromWorkList(Load);
+ DAG.DeleteNode(Load);
+ AddToWorkList(Trunc.getNode());
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+ Replace = false;
+ DebugLoc dl = Op.getDebugLoc();
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
+ : LD->getExtensionType();
+ Replace = true;
+ return DAG.getExtLoad(ExtType, dl, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ }
+
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ default: break;
+ case ISD::AssertSext:
+ return DAG.getNode(ISD::AssertSext, dl, PVT,
+ SExtPromoteOperand(Op.getOperand(0), PVT),
+ Op.getOperand(1));
+ case ISD::AssertZext:
+ return DAG.getNode(ISD::AssertZext, dl, PVT,
+ ZExtPromoteOperand(Op.getOperand(0), PVT),
+ Op.getOperand(1));
+ case ISD::Constant: {
+ unsigned ExtOpc =
+ Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOpc, dl, PVT, Op);
+ }
+ }
+
+ if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+ return SDValue();
+ return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+ if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+ return SDValue();
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (NewOp.getNode() == 0)
+ return SDValue();
+ AddToWorkList(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+ DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (NewOp.getNode() == 0)
+ return SDValue();
+ AddToWorkList(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+}
+
+/// PromoteIntBinOp - Promote the specified integer binary operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ bool Replace0 = false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+ if (NN0.getNode() == 0)
+ return SDValue();
+
+ bool Replace1 = false;
+ SDValue N1 = Op.getOperand(1);
+ SDValue NN1;
+ if (N0 == N1)
+ NN1 = NN0;
+ else {
+ NN1 = PromoteOperand(N1, PVT, Replace1);
+ if (NN1.getNode() == 0)
+ return SDValue();
+ }
+
+ AddToWorkList(NN0.getNode());
+ if (NN1.getNode())
+ AddToWorkList(NN1.getNode());
+
+ if (Replace0)
+ ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+ if (Replace1)
+ ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Opc, dl, PVT, NN0, NN1));
+ }
+ return SDValue();
+}
+
+/// PromoteIntShiftOp - Promote the specified integer shift operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ bool Replace = false;
+ SDValue N0 = Op.getOperand(0);
+ if (Opc == ISD::SRA)
+ N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+ else if (Opc == ISD::SRL)
+ N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+ else
+ N0 = PromoteOperand(N0, PVT, Replace);
+ if (N0.getNode() == 0)
+ return SDValue();
+
+ AddToWorkList(N0.getNode());
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
+ }
+ return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+ if (!LegalOperations)
+ return false;
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return false;
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return false;
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDNode *N = Op.getNode();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
+ : LD->getExtensionType();
+ SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+
+ DEBUG(dbgs() << "\nPromoting ";
+ N->dump(&DAG);
+ dbgs() << "\nTo: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ AddToWorkList(Result.getNode());
+ return true;
+ }
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+ // set the instance variables, so that the various visit routines may use it.
+ Level = AtLevel;
+ LegalOperations = Level >= AfterLegalizeVectorOps;
+ LegalTypes = Level >= AfterLegalizeTypes;
+
+ // Add all the dag nodes to the worklist.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I)
+ AddToWorkList(I);
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // The root of the dag may dangle to deleted nodes until the dag combiner is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // while the worklist isn't empty, find a node and
+ // try and combine it.
+ while (!WorkListContents.empty()) {
+ SDNode *N;
+ // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+ // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
+ // worklist *should* contain, and check the node we want to visit is should
+ // actually be visited.
+ do {
+ N = WorkListOrder.pop_back_val();
+ } while (!WorkListContents.erase(N));
+
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (N->use_empty() && N != &Dummy) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ DAG.DeleteNode(N);
+ continue;
+ }
+
+ SDValue RV = combine(N);
+
+ if (RV.getNode() == 0)
+ continue;
+
+ ++NodesCombined;
+
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.getNode() == N)
+ continue;
+
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ DEBUG(dbgs() << "\nReplacing.3 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ RV.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ // Transfer debug value.
+ DAG.TransferDbgValues(SDValue(N, 0), RV);
+ WorkListRemover DeadNodes(*this);
+ if (N->getNumValues() == RV.getNode()->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.getNode());
+ else {
+ assert(N->getValueType(0) == RV.getValueType() &&
+ N->getNumValues() == 1 && "Type mismatch");
+ SDValue OpV = RV;
+ DAG.ReplaceAllUsesWith(N, &OpV);
+ }
+
+ // Push the new node and any users onto the worklist
+ AddToWorkList(RV.getNode());
+ AddUsersToWorkList(RV.getNode());
+
+ // Add any uses of the old node to the worklist in case this node is the
+ // last one that uses them. They may become dead after this node is
+ // deleted.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+ DAG.RemoveDeadNodes();
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::SUBC: return visitSUBC(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::SUBE: return visitSUBE(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM: return visitSREM(N);
+ case ISD::UREM: return visitUREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SMULO: return visitSMULO(N);
+ case ISD::UMULO: return visitUMULO(N);
+ case ISD::SDIVREM: return visitSDIVREM(N);
+ case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BITCAST: return visitBITCAST(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FMA: return visitFMA(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::MEMBARRIER: return visitMEMBARRIER(N);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+ SDValue RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (RV.getNode() == 0) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+ // Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+ }
+
+ // If nothing happened still, try promoting the operation.
+ if (RV.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ RV = PromoteIntBinOp(SDValue(N, 0));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ RV = PromoteIntShiftOp(SDValue(N, 0));
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ RV = PromoteExtend(SDValue(N, 0));
+ break;
+ case ISD::LOAD:
+ if (PromoteLoad(SDValue(N, 0)))
+ RV = SDValue(N, 0);
+ break;
+ }
+ }
+
+ // If N is a commutative binary node, try commuting it to enable more
+ // sdisel CSE.
+ if (RV.getNode() == 0 &&
+ SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ N->getNumValues() == 1) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Constant operands are canonicalized to RHS.
+ if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+ SDValue Ops[] = { N1, N0 };
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
+ Ops, 2);
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+ }
+
+ return RV;
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ SDNode *TF = TFs[i];
+
+ // Check each of the operands.
+ for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+ SDValue Op = TF->getOperand(i);
+
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // rededundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if (Op.hasOneUse() &&
+ std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+ // Queue up for processing.
+ TFs.push_back(Op.getNode());
+ // Clean up in case the token factor is removed.
+ AddToWorkList(Op.getNode());
+ Changed = true;
+ break;
+ }
+ // Fall thru
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.getNode()))
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ SDValue Result;
+
+ // If we've change things around then replace token factor.
+ if (Changed) {
+ if (Ops.empty()) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ // New and improved token factor.
+ Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ }
+
+ // Don't add users to work list.
+ return CombineTo(N, Result, false);
+ }
+
+ return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+ WorkListRemover DeadNodes(*this);
+ // Replacing results may cause a different MERGE_VALUES to suddenly
+ // be CSE'd with N, and carry its uses with it. Iterate until no
+ // uses remain, to ensure that the node can be safely deleted.
+ // First add the users of this node to the work list so that they
+ // can be tried again once they have new operands.
+ AddUsersToWorkList(N);
+ do {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
+ } while (!N->use_empty());
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static
+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
+ SelectionDAG &DAG) {
+ EVT VT = N0.getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+
+ if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N00.getOperand(1))) {
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
+ N00.getOperand(0), N01),
+ DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
+ N00.getOperand(1), N01));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (add x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ }
+
+ // fold (add x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (add c1, c2) -> c1+c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (add x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
+ GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+ GA->getOffset() +
+ (uint64_t)N1C->getSExtValue());
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N1C && N0.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(N1C->getAPIntValue()+
+ N0C->getAPIntValue(), VT),
+ N0.getOperand(1));
+ // reassociate add
+ SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
+ if (RADD.getNode() != 0)
+ return RADD;
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+ cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+ cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+ // fold ((B-A)+A) -> B
+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+ return N0.getOperand(0);
+ // fold (A+(B-(A+C))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(0))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ // fold (A+(B-(C+A))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(1))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(0));
+ // fold (A+((B-A)+or-C)) to (B+or-C)
+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+ N1.getOperand(0).getOpcode() == ISD::SUB &&
+ N0 == N1.getOperand(0).getOperand(1))
+ return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
+ DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
+ }
+
+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if (VT.isInteger() && !VT.isVector()) {
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
+ }
+ }
+
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
+ if (Result.getNode()) return Result;
+ }
+ if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
+ if (Result.getNode()) return Result;
+ }
+
+ // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+ if (N1.getOpcode() == ISD::SHL &&
+ N1.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1)));
+ if (N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N0.getOperand(0).getOperand(1),
+ N0.getOperand(1)));
+
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue AndOp0 = N1.getOperand(0);
+ ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+ unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+ // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+ // and similar xforms where the inner op is either ~0 or 0.
+ if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+ DebugLoc DL = N->getDebugLoc();
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ }
+ }
+
+ // add (sext i1), X -> sub X, (zext i1)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueType() == MVT::i1 &&
+ !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Glue));
+
+ // canonicalize constant to RHS.
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ // fold (addc x, 0) -> x + no carry out
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Glue));
+
+ // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
+ return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Glue));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
+ N1, N0, CarryIn);
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ }
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+ &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (sub x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+ // fold (sub c1, c2) -> c1-c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), VT));
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+ if (N0C && N0C->isAllOnesValue())
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+ // fold C2-(A+C1) -> (C2-C1)-A
+ if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+ SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
+ VT);
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
+ N1.getOperand(0));
+ }
+ // fold ((A+(B+or-C))-B) -> A+or-C
+ if (N0.getOpcode() == ISD::ADD &&
+ (N0.getOperand(1).getOpcode() == ISD::SUB ||
+ N0.getOperand(1).getOpcode() == ISD::ADD) &&
+ N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(1));
+ // fold ((A+(C+B))-B) -> A+C
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ // fold ((A-(B-C))-C) -> A-B
+ if (N0.getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+ // If either operand of a sub is undef, the result is undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ // If the relocation model supports it, consider symbol offsets.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // fold (sub Sym, c) -> Sym-c
+ if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+ GA->getOffset() -
+ (uint64_t)N1C->getSExtValue());
+ // fold (sub Sym+c1, Sym+c2) -> c1-c2
+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+ if (GA->getGlobal() == GB->getGlobal())
+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+ VT);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, VT),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, 0) -> x + no borrow
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (N0C && N0C->isAllOnesValue())
+ return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (sube x, y, false) -> (subc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (mul x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (mul c1, c2) -> c1*c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (mul x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mul x, -1) -> 0-x
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // fold (mul x, (1 << c)) -> x << c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy(N0.getValueType())));
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
+ unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT),
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(Log2Val,
+ getShiftAmountTy(N0.getValueType()))));
+ }
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1, N0.getOperand(1));
+ AddToWorkList(C3.getNode());
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDValue Sh(0,0), Y(0,0);
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getNode()->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getNode()->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+
+ if (Sh.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ Mul, Sh.getOperand(1));
+ }
+ }
+
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ // reassociate mul
+ SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
+ if (RMUL.getNode() != 0)
+ return RMUL;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sdiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->getAPIntValue() == 1LL)
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
+ N0, N1);
+ }
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ if (N1C && !N1C->isNullValue() &&
+ (N1C->getAPIntValue().isPowerOf2() ||
+ (-N1C->getAPIntValue()).isPowerOf2())) {
+ // If dividing by powers of two is cheap, then don't perform the following
+ // fold.
+ if (TLI.isPow2DivCheap())
+ return SDValue();
+
+ unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+
+ // Splat the sign bit into the register
+ SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ AddToWorkList(SGN.getNode());
+
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
+ DAG.getConstant(VT.getSizeInBits() - lg2,
+ getShiftAmountTy(SGN.getValueType())));
+ SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
+ AddToWorkList(SRL.getNode());
+ AddToWorkList(ADD.getNode()); // Divide by pow2
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
+ DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (N1C->getAPIntValue().isNonNegative())
+ return SRA;
+
+ AddToWorkList(SRA.getNode());
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), SRA);
+ }
+
+ // if integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence.
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ SDValue Op = BuildSDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (udiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy(N0.getValueType())));
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
+ N1.getOperand(1),
+ DAG.getConstant(SHC->getAPIntValue()
+ .logBase2(),
+ ADDVT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+ // fold (udiv x, c) -> alternate
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ SDValue Op = BuildUDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (srem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (urem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue()-1,VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+ VT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // fold (mulhs x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
+ DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+ getShiftAmountTy(N0.getValueType())));
+ // fold (mulhs x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // fold (mulhu x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhu x, 1) -> 0
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getConstant(0, N0.getValueType());
+ // fold (mulhu x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
+/// compute two values. LoOp and HiOp give the opcodes for the two computations
+/// that are being performed. Return true if a simplification was made.
+///
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp) {
+ // If the high half is not needed, just compute the low half.
+ bool HiExists = N->hasAnyUseOfValue(1);
+ if (!HiExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+ SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If the low half is not needed, just compute the high half.
+ bool LoExists = N->hasAnyUseOfValue(0);
+ if (!LoExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+ SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If both halves are used, return as it is.
+ if (LoExists && HiExists)
+ return SDValue();
+
+ // If the two computed results can be simplified separately, separate them.
+ if (LoExists) {
+ SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Lo.getNode());
+ SDValue LoOpt = combine(Lo.getNode());
+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+ return CombineTo(N, LoOpt, LoOpt);
+ }
+
+ if (HiExists) {
+ SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Hi.getNode());
+ SDValue HiOpt = combine(Hi.getNode());
+ if (HiOpt.getNode() && HiOpt != Hi &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+ return CombineTo(N, HiOpt, HiOpt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
+ if (Res.getNode()) return Res;
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
+ if (Res.getNode()) return Res;
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMULO(SDNode *N) {
+ // (smulo x, 2) -> (saddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMULO(SDNode *N) {
+ // (umulo x, 2) -> (uaddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+ // Bail early if none of these transforms apply.
+ if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
+ // For each of OP in AND/OR/XOR:
+ // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+ // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+ // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ //
+ // do not sink logical op inside of a vector extend, since it may combine
+ // into a vsetcc.
+ EVT Op0VT = N0.getOperand(0).getValueType();
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ // Avoid infinite looping with PromoteIntBinOp.
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
+ (N0.getOpcode() == ISD::TRUNCATE &&
+ (!TLI.isZExtFree(VT, Op0VT) ||
+ !TLI.isTruncateFree(Op0VT, VT)) &&
+ TLI.isTypeLegal(Op0VT))) &&
+ !VT.isVector() &&
+ Op0VT == N1.getOperand(0).getValueType() &&
+ (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
+ }
+
+ // For each of OP in SHL/SRL/SRA/AND...
+ // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+ // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
+ // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+ N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ORNode, N0.getOperand(1));
+ }
+
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization after type legalization and before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((N0.getOpcode() == ISD::BITCAST ||
+ N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+ Level == AfterLegalizeTypes) {
+ SDValue In0 = N0.getOperand(0);
+ SDValue In1 = N1.getOperand(0);
+ EVT In0Ty = In0.getValueType();
+ EVT In1Ty = In1.getValueType();
+ DebugLoc DL = N->getDebugLoc();
+ // If both incoming values are integers, and the original types are the
+ // same.
+ if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+ SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
+ SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
+ AddToWorkList(Op.getNode());
+ return BC;
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+ N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+
+ assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+ "Inputs to shuffles are not the same type");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ bool SameMask = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx0 = SVN0->getMaskElt(i);
+ int Idx1 = SVN1->getMaskElt(i);
+ if (Idx0 != Idx1) {
+ SameMask = false;
+ break;
+ }
+ }
+
+ if (SameMask) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(Op.getNode());
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+ DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N1.getValueType();
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (and x, 0) -> 0, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+
+ // fold (and x, -1) -> x, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return N0;
+ }
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (and c1, c2) -> c1&c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
+ // fold (and x, -1) -> x
+ if (N1C && N1C->isAllOnesValue())
+ return N0;
+ // if (and x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(BitWidth)))
+ return DAG.getConstant(0, VT);
+ // reassociate and
+ SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
+ if (RAND.getNode() != 0)
+ return RAND;
+ // fold (and (or x, C), D) -> D if (C & D) == D
+ if (N1C && N0.getOpcode() == ISD::OR)
+ if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ return N1;
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N0Op0 = N0.getOperand(0);
+ APInt Mask = ~N1C->getAPIntValue();
+ Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+ if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
+ N0.getValueType(), N0Op0);
+
+ // Replace uses of the AND with uses of the Zero extend node.
+ CombineTo(N, Zext);
+
+ // We actually want to replace all uses of the any_extend with the
+ // zero_extend, to avoid duplicating things. This will later cause this
+ // AND to be folded.
+ CombineTo(N0.getNode(), Zext);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD) ||
+ N0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+ N0 : N0.getOperand(0) );
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getNullValue(1);
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ if (IsSplat) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ EVT VT = Vector->getValueType(0);
+ unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+
+ // If the splat value has been compressed to a bitlength lower
+ // than the size of the vector lane, we need to re-expand it to
+ // the lane size.
+ if (BitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
+ SplatBitSize < BitWidth;
+ SplatBitSize = SplatBitSize * 2)
+ SplatValue |= SplatValue.shl(SplatBitSize);
+
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+ bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getMemoryVT());
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant =
+ Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default: B = false; break;
+ case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD: B = true; break;
+ }
+
+ if (B && Constant.isAllOnesValue()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+ Load->getValueType(0), Load->getDebugLoc(),
+ Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(),
+ Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2) };
+ CombineTo(Load, To, 3, true);
+ } else {
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+ }
+
+ // Fold the AND away, taking care not to fold to the old load node if we
+ // replaced it.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ TLI.getSetCCResultType(N0.getSimpleValueType())))))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+ if (N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+ LoadSDNode *LN0 = HasAnyExt
+ ? cast<LoadSDNode>(N0.getOperand(0))
+ : cast<LoadSDNode>(N0);
+ if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+ LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
+ uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
+ if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT LoadedVT = LN0->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // Do not change the width of a volatile load.
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT PtrType = LN0->getOperand(1).getValueType();
+
+ unsigned Alignment = LN0->getAlignment();
+ SDValue NewPtr = LN0->getBasePtr();
+
+ // For big endian targets, we need to add an offset to the pointer
+ // to load the correct bytes. For little endian systems, we merely
+ // need to read fewer bytes from the same pointer.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+ unsigned EVTStoreBytes = ExtVT.getStoreSize();
+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+ NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+ NewPtr, DAG.getConstant(PtrOff, PtrType));
+ Alignment = MinAlign(Alignment, PtrOff);
+ }
+
+ AddToWorkList(NewPtr.getNode());
+
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), NewPtr,
+ LN0->getPointerInfo(),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ Alignment);
+ AddToWorkList(N);
+ CombineTo(LN0, Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
+///
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+ bool LookPassAnd0 = false;
+ bool LookPassAnd1 = false;
+ if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+ std::swap(N0, N1);
+ if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() == ISD::AND) {
+ if (!N0.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01C || N01C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N0 = N0.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ if (N1.getOpcode() == ISD::AND) {
+ if (!N1.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C || N11C->getZExtValue() != 0xFF)
+ return SDValue();
+ N1 = N1.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0.getNode()->hasOneUse() ||
+ !N1.getNode()->hasOneUse())
+ return SDValue();
+
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N01C || !N11C)
+ return SDValue();
+ if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+ return SDValue();
+
+ // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+ SDValue N00 = N0->getOperand(0);
+ if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+ if (!N00.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+ if (!N001C || N001C->getZExtValue() != 0xFF)
+ return SDValue();
+ N00 = N00.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ SDValue N10 = N1->getOperand(0);
+ if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+ if (!N10.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+ if (!N101C || N101C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N10 = N10.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N00 != N10)
+ return SDValue();
+
+ // Make sure everything beyond the low halfword is zero since the SRL 16
+ // will clear the top bits.
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ if (DemandHighBits && OpSizeInBits > 16 &&
+ (!LookPassAnd0 || !LookPassAnd1) &&
+ !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
+ return SDValue();
+
+ SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00);
+ if (OpSizeInBits > 16)
+ Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res,
+ DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+ return Res;
+}
+
+/// isBSwapHWordElement - Return true if the specified node is an element
+/// that makes up a 32-bit packed halfword byteswap. i.e.
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+ if (!N.getNode()->hasOneUse())
+ return false;
+
+ unsigned Opc = N.getOpcode();
+ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+ return false;
+
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!N1C)
+ return false;
+
+ unsigned Num;
+ switch (N1C->getZExtValue()) {
+ default:
+ return false;
+ case 0xFF: Num = 0; break;
+ case 0xFF00: Num = 1; break;
+ case 0xFF0000: Num = 2; break;
+ case 0xFF000000: Num = 3; break;
+ }
+
+ // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+ SDValue N0 = N.getOperand(0);
+ if (Opc == ISD::AND) {
+ if (Num == 0 || Num == 2) {
+ // (x >> 8) & 0xff
+ // (x >> 8) & 0xff0000
+ if (N0.getOpcode() != ISD::SRL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else {
+ // (x << 8) & 0xff00
+ // (x << 8) & 0xff000000
+ if (N0.getOpcode() != ISD::SHL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+ } else if (Opc == ISD::SHL) {
+ // (x & 0xff) << 8
+ // (x & 0xff0000) << 8
+ if (Num != 0 && Num != 2)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else { // Opc == ISD::SRL
+ // (x & 0xff00) >> 8
+ // (x & 0xff000000) >> 8
+ if (Num != 1 && Num != 3)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+
+ if (Parts[Num])
+ return false;
+
+ Parts[Num] = N0.getOperand(0).getNode();
+ return true;
+}
+
+/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+ // Look for either
+ // (or (or (and), (and)), (or (and), (and)))
+ // (or (or (or (and), (and)), (and)), (and))
+ if (N0.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (N1.getOpcode() == ISD::OR &&
+ N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
+ // (or (or (and), (and)), (or (and), (and)))
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ SDValue N010 = N01.getOperand(0);
+ if (!isBSwapHWordElement(N010, Parts))
+ return SDValue();
+ SDValue N011 = N01.getOperand(1);
+ if (!isBSwapHWordElement(N011, Parts))
+ return SDValue();
+ } else {
+ // (or (or (or (and), (and)), (and)), (and))
+ if (!isBSwapHWordElement(N1, Parts))
+ return SDValue();
+ if (!isBSwapHWordElement(N01, Parts))
+ return SDValue();
+ if (N00.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ }
+
+ // Make sure the parts are all coming from the same node.
+ if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+ return SDValue();
+
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT,
+ SDValue(Parts[0],0));
+
+ // Result of the bswap should be rotated by 16. If it's not legal, than
+ // do (x << 16) | (x >> 16).
+ SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+ return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
+ if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
+ DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt));
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N1.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (or x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+
+ // fold (or x, -1) -> -1, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return N1;
+ }
+
+ // fold (or x, undef) -> -1
+ if (!LegalOperations &&
+ (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ }
+ // fold (or c1, c2) -> c1|c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
+ // fold (or x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (or x, -1) -> -1
+ if (N1C && N1C->isAllOnesValue())
+ return N1;
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+ return N1;
+
+ // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+ SDValue BSwap = MatchBSwapHWord(N, N0, N1);
+ if (BSwap.getNode() != 0)
+ return BSwap;
+ BSwap = MatchBSwapHWordLow(N, N0, N1);
+ if (BSwap.getNode() != 0)
+ return BSwap;
+
+ // reassociate or
+ SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
+ if (ROR.getNode() != 0)
+ return ROR;
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ // iff (c1 & c2) == 0.
+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ TLI.getSetCCResultType(N0.getValueType())))))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N1.getOperand(1).getOpcode() == ISD::Constant &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &RHSMask =
+ cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+ }
+ }
+
+ // See if this is some rotate idiom.
+ if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
+ return SDValue(Rot, 0);
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND) {
+ if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ Op = Op.getOperand(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+
+ return false;
+}
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
+ // Must be a legal type. Expanded 'n promoted things won't work with rotates.
+ EVT VT = LHS.getValueType();
+ if (!TLI.isTypeLegal(VT)) return 0;
+
+ // The target must have at least one rotate flavor.
+ bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+ bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+ if (!HasROTL && !HasROTR) return 0;
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDValue LHSShift; // The shift.
+ SDValue LHSMask; // AND value if any.
+ if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+ return 0; // Not part of a rotate.
+
+ SDValue RHSShift; // The shift.
+ SDValue RHSMask; // AND value if any.
+ if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+ return 0; // Not part of a rotate.
+
+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+ return 0; // Not shifting the same value.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return 0; // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask , RHSMask );
+ }
+
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ SDValue LHSShiftArg = LHSShift.getOperand(0);
+ SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+ RHSShiftAmt.getOpcode() == ISD::Constant) {
+ uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != OpSizeInBits)
+ return 0;
+
+ SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
+
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.getNode() || RHSMask.getNode()) {
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+
+ if (LHSMask.getNode()) {
+ APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
+ Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ }
+ if (RHSMask.getNode()) {
+ APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
+ Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ }
+
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+ }
+
+ return Rot.getNode();
+ }
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.getNode() || RHSMask.getNode())
+ return 0;
+
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+ if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+ LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+ if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+ RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // Look for sign/zext/any-extended or truncate cases:
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
+ SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
+ if (RExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0.getOperand(1) == LExtOp0) {
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y)
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotr x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ }
+ }
+ } else if (LExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0 == LExtOp0.getOperand(1)) {
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotr x, y)
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotl x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
+ LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LHS, RHS, CC;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (xor x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (xor x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (xor c1, c2) -> c1^c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold (xor x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // reassociate xor
+ SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
+ if (RXOR.getNode() != 0)
+ return RXOR;
+
+ // fold !(x cc y) -> (x !cc y)
+ if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ bool isInt = LHS.getValueType().isInteger();
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ isInt);
+
+ if (!LegalOperations ||
+ TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
+ switch (N0.getOpcode()) {
+ default:
+ llvm_unreachable("Unhandled SetCC Equivalent!");
+ case ISD::SETCC:
+ return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
+ case ISD::SELECT_CC:
+ return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
+ N0.getOperand(3), NotCC);
+ }
+ }
+ }
+
+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+ if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getNode()->hasOneUse() &&
+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+ SDValue V = N0.getOperand(0);
+ V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
+ DAG.getConstant(1, V.getValueType()));
+ AddToWorkList(V.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
+ }
+
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+ if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+ if (N1C && N1C->isAllOnesValue() &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+ if (N1C && N0.getOpcode() == ISD::XOR) {
+ ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N00C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N00C->getAPIntValue(), VT));
+ if (N01C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N01C->getAPIntValue(), VT));
+ }
+ // fold (xor x, x) -> 0
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // Simplify the expression using non-local knowledge.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+ SDNode *LHS = N->getOperand(0).getNode();
+ if (!LHS->hasOneUse()) return SDValue();
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ bool HighBitSet = false; // Can we transform this if the high bit is set?
+
+ switch (LHS->getOpcode()) {
+ default: return SDValue();
+ case ISD::OR:
+ case ISD::XOR:
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ case ISD::AND:
+ HighBitSet = true; // We can only transform sra if the high bit is set.
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDValue(); // only shl(add) not sr[al](add).
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ }
+
+ // We require the RHS of the binop to be a constant as well.
+ ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ if (!BinOpCst) return SDValue();
+
+ // FIXME: disable this unless the input to the binop is a shift by a constant.
+ // If it is not a shift, it pessimizes some common cases like:
+ //
+ // void foo(int *X, int i) { X[i & 1235] = 1; }
+ // int bar(int *X, int i) { return X[i & 255]; }
+ SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+ if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+ BinOpLHSVal->getOpcode() != ISD::SRA &&
+ BinOpLHSVal->getOpcode() != ISD::SRL) ||
+ !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // If this is a signed shift right, and the high bit is modified by the
+ // logical operation, do not perform the transformation. The highBitSet
+ // boolean indicates the value of the high bit of the constant which would
+ // cause it to be modified for this operation.
+ if (N->getOpcode() == ISD::SRA) {
+ bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+ if (BinOpRHSSignSet != HighBitSet)
+ return SDValue();
+ }
+
+ // Fold the constants, shifting the binop RHS by the shift amount.
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
+ N->getValueType(0),
+ LHS->getOperand(1), N->getOperand(1));
+
+ // Create the new shift.
+ SDValue NewShift = DAG.getNode(N->getOpcode(),
+ LHS->getOperand(0).getDebugLoc(),
+ VT, LHS->getOperand(0), N->getOperand(1));
+
+ // Create the new binop.
+ return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (shl c1, c2) -> c1<<c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+ // fold (shl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (shl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (shl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (shl undef, x) -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+ DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+ N0.getOperand(0)->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ }
+
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
+ // Only fold this if the inner shift has no other uses -- if it does, folding
+ // this will increase the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ if (c1 < VT.getSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask = Mask.shl(c2-c1);
+ Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ } else {
+ Mask = Mask.lshr(c1-c2);
+ Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
+ return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift,
+ DAG.getConstant(Mask, VT));
+ }
+ }
+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ SDValue HiBitsMask =
+ DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() -
+ N1C->getZExtValue()),
+ VT);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+ HiBitsMask);
+ }
+
+ if (N1C) {
+ SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSHL.getNode())
+ return NewSHL;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (sra c1, c2) -> (sra c1, c2)
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+ // fold (sra 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (sra -1, x) -> -1
+ if (N0C && N0C->isAllOnesValue())
+ return N0;
+ // fold (sra x, (setge c, size(x))) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (sra x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ ExtVT, VT.getVectorNumElements());
+ if ((!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getValueType(ExtVT));
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRA) {
+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+ if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(Sum, N1C->getValueType(0)));
+ }
+ }
+
+ // fold (sra (shl X, m), (sub result_size, n))
+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+ // result_size - n != m.
+ // If truncate is free for the target sext(shl) is likely to result in better
+ // code.
+ if (N0.getOpcode() == ISD::SHL) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N01C && N1C) {
+ // Determine what the truncate's result bitsize and type would be.
+ EVT TruncVT =
+ EVT::getIntegerVT(*DAG.getContext(),
+ OpSizeInBits - N1C->getZExtValue());
+ // Determine the residual right-shift amount.
+ signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+ // If the shift is not a no-op (in which case this should be just a sign
+ // extend already), the truncated to type is legal, sign_extend is legal
+ // on that type, and the truncate to that type is both legal and free,
+ // perform the transform.
+ if ((ShiftAmt > 0) &&
+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+
+ SDValue Amt = DAG.getConstant(ShiftAmt,
+ getShiftAmountTy(N0.getOperand(0).getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
+ N->getValueType(0), Trunc);
+ }
+ }
+ }
+
+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ // if c1 is equal to the number of bits the trunc removes
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() &&
+ N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+ EVT LargeVT = N0.getOperand(0).getValueType();
+ ConstantSDNode *LargeShiftAmt =
+ cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+ if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+ LargeShiftAmt->getZExtValue()) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+ getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+ N0.getOperand(0).getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
+
+ if (N1C) {
+ SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSRA.getNode())
+ return NewSRA;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+ // fold (srl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (srl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (srl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (srl x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+ DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
+ N0.getOperand(0)->getOperand(0),
+ DAG.getConstant(c1 + c2, ShiftCountVT)));
+ }
+ }
+
+ // fold (srl (shl x, c), c) -> (and x, cst2)
+ if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+ N0.getValueSizeInBits() <= 64) {
+ uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL >> ShAmt, VT));
+ }
+
+
+ // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ EVT SmallVT = N0.getOperand(0).getValueType();
+ if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+ return DAG.getUNDEF(VT);
+
+ if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+ uint64_t ShiftAmt = N1C->getZExtValue();
+ SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+ N0.getOperand(0),
+ DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
+ AddToWorkList(SmallShift.getNode());
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+ }
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ APInt UnknownBits = ~KnownZero;
+ if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if ((UnknownBits & (UnknownBits - 1)) == 0) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL/XOR pair, which is likely to simplify more.
+ unsigned ShAmt = UnknownBits.countTrailingZeros();
+ SDValue Op = N0.getOperand(0);
+
+ if (ShAmt) {
+ Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
+ DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+ AddToWorkList(Op.getNode());
+ }
+
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ Op, DAG.getConstant(1, VT));
+ }
+ }
+
+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ EVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (N1C) {
+ SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSRL.getNode())
+ return NewSRL;
+ }
+
+ // Attempt to convert a srl of a load into a narrower zero-extending load.
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
+ // Here is a common situation. We want to optimize:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // However when after the source operand of SRL is optimized into AND, the SRL
+ // itself may not be optimized further. Look for it and add the BRCOND into
+ // the worklist.
+ if (N->hasOneUse()) {
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+ // Also look pass the truncate.
+ Use = *Use->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ EVT VT = N->getValueType(0);
+ EVT VT0 = N0.getValueType();
+
+ // fold (select C, X, X) -> X
+ if (N1 == N2)
+ return N1;
+ // fold (select true, X, Y) -> X
+ if (N0C && !N0C->isNullValue())
+ return N1;
+ // fold (select false, X, Y) -> Y
+ if (N0C && N0C->isNullValue())
+ return N2;
+ // fold (select C, 1, X) -> (or C, X)
+ if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select C, 0, 1) -> (xor C, 1)
+ if (VT.isInteger() &&
+ (VT0 == MVT::i1 ||
+ (VT0.isInteger() &&
+ TLI.getBooleanContents(false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
+ N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+ SDValue XORNode;
+ if (VT == VT0)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ AddToWorkList(XORNode.getNode());
+ if (VT.bitsGT(VT0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
+ }
+ // fold (select C, 0, X) -> (and (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
+ }
+ // fold (select C, X, 1) -> (or (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
+ }
+ // fold (select C, X, 0) -> (and C, X)
+ if (VT == MVT::i1 && N2C && N2C->isNullValue())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or X, Y)
+ if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select X, Y, X) -> (and X, Y)
+ // fold (select X, Y, 0) -> (and X, Y)
+ if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // FIXME:
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, N->getDebugLoc(), false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+
+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ if (!SCCC->isNullValue())
+ return N2; // cond always true -> true val
+ else
+ return N3; // cond always false -> false val
+ }
+
+ // Fold to a simpler select_cc
+ if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+ SCC.getOperand(2));
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+ return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(),
+ N->getDebugLoc());
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+ unsigned ExtOpc,
+ SmallVector<SDNode*, 4> &ExtendNodes,
+ const TargetLowering &TLI) {
+ bool HasCopyToRegUses = false;
+ bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != N0.getResNo())
+ continue;
+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+ // Sign bits will be lost after a zext.
+ return false;
+ bool Add = false;
+ for (unsigned i = 0; i != 2; ++i) {
+ SDValue UseOp = User->getOperand(i);
+ if (UseOp == N0)
+ continue;
+ if (!isa<ConstantSDNode>(UseOp))
+ return false;
+ Add = true;
+ }
+ if (Add)
+ ExtendNodes.push_back(User);
+ continue;
+ }
+ // If truncates aren't free and there are users we can't
+ // extend, it isn't worthwhile.
+ if (!isTruncFree)
+ return false;
+ // Remember if this value is live-out.
+ if (User->getOpcode() == ISD::CopyToReg)
+ HasCopyToRegUses = true;
+ }
+
+ if (HasCopyToRegUses) {
+ bool BothLiveOut = false;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+ BothLiveOut = true;
+ break;
+ }
+ }
+ if (BothLiveOut)
+ // Both unextended and extended values are live out. There had better be
+ // a good reason for the transformation.
+ return ExtendNodes.size();
+ }
+ return true;
+}
+
+void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType) {
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (sext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (OpBits < DestBits)
+ Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
+ else if (OpBits > DestBits)
+ Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // fold (sext (load x)) -> (sext (truncate (sextload x)))
+ // None of the supported targets knows how to perform load and sign extend
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+ // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+ if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (sextload x), (sext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.sext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // of the same size as the compared operands. Only optimize sext(setcc())
+ // if this is the case.
+ EVT SVT = TLI.getSetCCResultType(N0VT);
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
+ return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
+ }
+
+ // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+ unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+ SDValue NegOne =
+ DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ NegOne, DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ if (!VT.isVector() && (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))))
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ NegOne, DAG.getConstant(0, VT));
+ }
+
+ // fold (sext x) -> (zext x) if the sign bit is known zero.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero in KnownZero.
+// This function computes KnownZero to avoid a duplicated call to
+// ComputeMaskedBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ APInt &KnownZero) {
+ APInt KnownOne;
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ return true;
+ }
+
+ if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
+ cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ if (COp0 && COp0->isNullValue())
+ Op = Op1;
+ else if (COp1 && COp1->isNullValue())
+ Op = Op0;
+ else
+ return false;
+
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+
+ if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+ return false;
+
+ return true;
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (zext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (zext (truncate x)) -> (zext x) or
+ // (zext (truncate x)) -> (truncate x)
+ // This is valid when the truncated bits of x are already zero.
+ // FIXME: We should extend this to work for vectors too.
+ SDValue Op;
+ APInt KnownZero;
+ if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+ APInt TruncatedBits =
+ (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
+ APInt(Op.getValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getValueSizeInBits(),
+ N0.getValueSizeInBits(),
+ std::min(Op.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (TruncatedBits == (KnownZero & TruncatedBits)) {
+ if (VT.bitsGT(Op.getValueType()))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
+ if (VT.bitsLT(Op.getValueType()))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+
+ return Op;
+ }
+ }
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ SDValue Op = N0.getOperand(0);
+ if (Op.getValueType().bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
+ } else if (Op.getValueType().bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
+ }
+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+ N0.getValueType().getScalarType());
+ }
+
+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+ // if either of the casts is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (zext (load x)) -> (zext (truncate (zextload x)))
+ // None of the supported targets knows how to perform load and vector_zext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (zextload x), (zext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+ // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+ if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
+ ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ if (!LegalOperations && VT.isVector()) {
+ // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ // Only do this before legalize for now.
+ EVT N0VT = N0.getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
+ DAG.getConstant(1, EltVT));
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ }
+
+ // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.hasOneUse()) {
+ SDValue ShAmt = N0.getOperand(1);
+ unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ if (N0.getOpcode() == ISD::SHL) {
+ SDValue InnerZExt = N0.getOperand(0);
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+ InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ if (ShAmtVal > KnownZeroBits)
+ return SDValue();
+ }
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (VT.getSizeInBits() >= 256)
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+ ShAmt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncOp = N0.getOperand(0);
+ if (TruncOp.getValueType() == VT)
+ return TruncOp; // x iff x size == zext size.
+ if (TruncOp.getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
+ }
+
+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+ // if the trunc is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType())) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ // None of the supported targets knows how to perform load and any_ext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ANY_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD &&
+ !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ else {
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode())
+ return SCC;
+ }
+
+ return SDValue();
+}
+
+/// GetDemandedBits - See if the specified operand can be simplified with the
+/// knowledge that only the bits specified by Mask are used. If so, return the
+/// simpler operand, otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ assert(CV != 0 && "Const value should be ConstSDNode.");
+ const APInt &CVal = CV->getAPIntValue();
+ APInt NewVal = CVal & Mask;
+ if (NewVal != CVal) {
+ return DAG.getConstant(NewVal, V.getValueType());
+ }
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ // If the LHS or RHS don't contribute bits to the or, drop them.
+ if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+ return V.getOperand(1);
+ if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+ return V.getOperand(0);
+ break;
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= Mask.getBitWidth()) break;
+ APInt NewMask = Mask << Amt;
+ SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
+ if (SimplifyLHS.getNode())
+ return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
+ SimplifyLHS, V.getOperand(1));
+ }
+ }
+ return SDValue();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = VT;
+
+ // This transformation isn't valid for vector loads.
+ if (VT.isVector())
+ return SDValue();
+
+ // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ } else if (Opc == ISD::SRL) {
+ // Another special-case: SRL is basically zero-extending a narrower value.
+ ExtType = ISD::ZEXTLOAD;
+ N0 = SDValue(N, 0);
+ ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01) return SDValue();
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - N01->getZExtValue());
+ }
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+ return SDValue();
+
+ unsigned EVTBits = ExtVT.getSizeInBits();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return SDValue();
+
+ unsigned ShAmt = 0;
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShAmt = N01->getZExtValue();
+ // Is the shift amount a multiple of size of VT?
+ if ((ShAmt & (EVTBits-1)) == 0) {
+ N0 = N0.getOperand(0);
+ // Is the load width a multiple of size of VT?
+ if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+ return SDValue();
+ }
+
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
+ // Because a SRL must be assumed to *need* to zero-extend the high bits
+ // (as opposed to anyext the high bits), we can't combine the zextload
+ // lowering of SRL and an sextload.
+ if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
+ return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ return SDValue();
+ }
+ }
+
+ // If the load is shifted left (and the result isn't shifted back right),
+ // we can fold the truncate through the shift.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
+ }
+ }
+
+ // If we haven't found a load, we can't narrow it. Don't transform one with
+ // multiple uses, this would require adding a new load.
+ if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
+ return SDValue();
+
+ // Don't change the width of a volatile load.
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (LN0->isVolatile())
+ return SDValue();
+
+ // Verify that we are actually reducing a load width here.
+ if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
+ return SDValue();
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (LN0->getNumValues() > 2)
+ return SDValue();
+
+ EVT PtrType = N0.getOperand(1).getValueType();
+
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ // It's not possible to generate a constant of extended or untyped type.
+ return SDValue();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), NewAlign);
+ else
+ Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy(Result.getValueType());
+ if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ // If the shift amount is as large as the result size (but, presumably,
+ // no larger than the source) then the useful bits of the result are
+ // zero; we can't simply return the shortened shift, because the result
+ // of that operation is undefined.
+ if (ShLeftAmt >= VT.getSizeInBits())
+ Result = DAG.getConstant(0, VT);
+ else
+ Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+ Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+ }
+
+ // Return the new loaded value.
+ return Result;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+
+ // fold (sext_in_reg c1) -> c1
+ if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ }
+
+ // fold (sext_in_reg (sext x)) -> (sext x)
+ // fold (sext_in_reg (aext x)) -> (sext x)
+ // if x is small enough.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ if (ISD::isEXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ AddToWorkList(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+ if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+ SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false);
+ if (BSwap.getNode() != 0)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ BSwap, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool isLE = TLI.isLittleEndian();
+
+ // noop truncate
+ if (N0.getValueType() == N->getValueType(0))
+ return N0;
+ // fold (truncate c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ if (N0.getOperand(0).getValueType().bitsLT(VT))
+ // if the source is smaller than the dest, we still need an extend
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0));
+ if (N0.getOperand(0).getValueType().bitsGT(VT))
+ // if the source is larger than the dest, than we just need the truncate
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
+ }
+
+ // Fold extract-and-trunc into a narrow extract. For example:
+ // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+ // i32 y = TRUNCATE(i64 x)
+ // -- becomes --
+ // v16i8 b = BITCAST (v2i64 val)
+ // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+ //
+ // Note: We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ unsigned NumElem = VecTy.getVectorNumElements();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ EVT IndexTy = N0->getOperand(1).getValueType();
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NVT, N0.getOperand(0));
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ N->getDebugLoc(), TrTy, V,
+ DAG.getConstant(Index, IndexTy));
+ }
+ }
+
+ // Fold a series of buildvector, bitcast, and truncate if possible.
+ // For example fold
+ // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+ // (2xi32 (buildvector x, y)).
+ if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+ N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getOperand(0).hasOneUse()) {
+
+ SDValue BuildVect = N0.getOperand(0);
+ EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+ EVT TruncVecEltTy = VT.getVectorElementType();
+
+ // Check that the element types match.
+ if (BuildVectEltTy == TruncVecEltTy) {
+ // Now we only need to compute the offset of the truncated elements.
+ unsigned BuildVecNumElts = BuildVect.getNumOperands();
+ unsigned TruncVecNumElts = VT.getVectorNumElements();
+ unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+ assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+ "Invalid number of elements");
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ Opnds.push_back(BuildVect.getOperand(i));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0],
+ Opnds.size());
+ }
+ }
+
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used.
+ // For example "trunc (or (shl x, 8), y)" // -> trunc y
+ // Currently we only perform this optimization on scalars because vectors
+ // may have different active low bits.
+ if (!VT.isVector()) {
+ SDValue Shorter =
+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (Shorter.getNode())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+ }
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+ SDValue Reduced = ReduceLoadWidth(N);
+ if (Reduced.getNode())
+ return Reduced;
+ }
+ // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
+ // where ... are all 'undef'.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
+ SmallVector<EVT, 8> VTs;
+ SDValue V;
+ unsigned Idx = 0;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue X = N0.getOperand(i);
+ if (X.getOpcode() != ISD::UNDEF) {
+ V = X;
+ Idx = i;
+ NumDefs++;
+ }
+ // Stop if more than one members are non-undef.
+ if (NumDefs > 1)
+ break;
+ VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ X.getValueType().getVectorNumElements()));
+ }
+
+ if (NumDefs == 0)
+ return DAG.getUNDEF(VT);
+
+ if (NumDefs == 1) {
+ assert(V.getNode() && "The single defined operand is empty!");
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (i != Idx) {
+ Opnds.push_back(DAG.getUNDEF(VTs[i]));
+ continue;
+ }
+ SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V);
+ AddToWorkList(NV.getNode());
+ Opnds.push_back(NV);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ &Opnds[0], Opnds.size());
+ }
+ }
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDValue Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.getNode();
+ return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ LD1->getPointerInfo().getAddrSpace() !=
+ LD2->getPointerInfo().getAddrSpace())
+ return SDValue();
+ EVT LD1VT = LD1->getValueType(0);
+
+ if (ISD::isNON_EXTLoad(LD2) &&
+ LD2->hasOneUse() &&
+ // If both are volatile this would reduce the number of volatile loads.
+ // If one is volatile it might be ok, but play conservative and bail out.
+ !LD1->isVolatile() &&
+ !LD2->isVolatile() &&
+ DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+ unsigned Align = LD1->getAlignment();
+ unsigned NewAlign = TLI.getDataLayout()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign <= Align &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+ return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+ LD1->getBasePtr(), LD1->getPointerInfo(),
+ false, false, false, Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize, since afterward the target may be depending
+ // on the bitconvert.
+ // First check to see if this is all constant.
+ if (!LegalTypes &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ VT.isVector()) {
+ bool isSimple = true;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+ if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+ N0.getOperand(i).getOpcode() != ISD::Constant &&
+ N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+ isSimple = false;
+ break;
+ }
+
+ EVT DestEltVT = N->getValueType(0).getVectorElementType();
+ assert(!DestEltVT.isVector() &&
+ "Element type of vector ValueType must not be vector!");
+ if (isSimple)
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ }
+
+ // If the input is a constant, let getNode fold it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
+ if (Res.getNode() != N) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+ return Res;
+
+ // Folding it resulted in an illegal node, and it's too late to
+ // do that. Clean up the old node and forego the transformation.
+ // Ideally this won't happen very often, because instcombine
+ // and the earlier dagcombine runs (where illegal nodes are
+ // permitted) should have folded most of them already.
+ DAG.DeleteNode(Res.getNode());
+ }
+ }
+
+ // (conv (conv x, t1), t2) -> (conv x, t2)
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ unsigned Align = TLI.getDataLayout()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+ unsigned OrigAlign = LN0->getAlignment();
+
+ if (Align <= OrigAlign) {
+ SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), OrigAlign);
+ AddToWorkList(N);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+ N0.getValueType(), Load),
+ Load.getValue(1));
+ return Load;
+ }
+ }
+
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ // This often reduces constant pool loads.
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
+ N0.getNode()->hasOneUse() && VT.isInteger() &&
+ !VT.isVector() && !N0.getValueType().isVector()) {
+ SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
+ N0.getOperand(0));
+ AddToWorkList(NewConv.getNode());
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ if (N0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(SignBit, VT));
+ assert(N0.getOpcode() == ISD::FABS);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(~SignBit, VT));
+ }
+
+ // fold (bitconvert (fcopysign cst, x)) ->
+ // (or (and (bitconvert x), sign), (and cst, (not sign)))
+ // Note that we don't handle (copysign x, cst) because this can always be
+ // folded to an fneg or fabs.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+ VT.isInteger() && !VT.isVector()) {
+ unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+ EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+ if (isTypeLegal(IntXVT)) {
+ SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+ IntXVT, N0.getOperand(1));
+ AddToWorkList(X.getNode());
+
+ // If X has a different width than the result/lhs, sext it or truncate it.
+ unsigned VTWidth = VT.getSizeInBits();
+ if (OrigXWidth < VTWidth) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ } else if (OrigXWidth > VTWidth) {
+ // To get the sign bit in the right place, we have to shift it right
+ // before truncating.
+ X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
+ X.getValueType(), X,
+ DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+ AddToWorkList(X.getNode());
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ }
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
+ X, DAG.getConstant(SignBit, VT));
+ AddToWorkList(X.getNode());
+
+ SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+ VT, N0.getOperand(0));
+ Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
+ Cst, DAG.getConstant(~SignBit, VT));
+ AddToWorkList(Cst.getNode());
+
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
+ }
+ }
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR) {
+ SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
+ if (CombineLD.getNode())
+ return CombineLD;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
+/// destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+ unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+
+ // Due to the FP element handling below calling this routine recursively,
+ // we can end up with a scalar-to-vector node here.
+ if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+ DstEltVT, BV->getOperand(0)));
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ SDValue Op = BV->getOperand(i);
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (Op.getValueType() != SrcEltVT)
+ Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
+ Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+ DstEltVT, Op));
+ AddToWorkList(Ops.back().getNode());
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (SrcEltVT.isFloatingPoint()) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (DstEltVT.isFloatingPoint()) {
+ assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+ EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ // Handling growing first.
+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+ if (SrcBitSize < DstBitSize) {
+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+ i += NumInputsPerOutput) {
+ bool isLE = TLI.isLittleEndian();
+ APInt NewBits = APInt(DstBitSize, 0);
+ bool EltIsUndef = true;
+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+ // Shift the previously computed bits over.
+ NewBits <<= SrcBitSize;
+ SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+ EltIsUndef = false;
+
+ NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
+ zextOrTrunc(SrcBitSize).zext(DstBitSize);
+ }
+
+ if (EltIsUndef)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+ }
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Finally, this must be the case where we are shrinking elements: each input
+ // turns into multiple outputs.
+ bool isS2V = ISD::isScalarToVector(BV);
+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ NumOutputsPerInput*BV->getNumOperands());
+ SmallVector<SDValue, 8> Ops;
+
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ continue;
+ }
+
+ APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue().zextOrTrunc(SrcBitSize);
+
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+ APInt ThisVal = OpVal.trunc(DstBitSize);
+ Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+ if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
+ // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ Ops[0]);
+ OpVal = OpVal.lshr(DstBitSize);
+ }
+
+ // For big endian targets, swap the order of the pieces of each element.
+ if (TLI.isBigEndian())
+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fadd c1, c2) -> c1 + c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (fadd A, 0) -> A
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fadd A, (fneg B)) -> (fsub A, B)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ // fold (fadd (fneg A), B) -> (fsub B, A)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
+ GetNegatedExpression(N0, DAG, LegalOperations));
+
+ // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has hard time in dealing with FP constant.
+ //
+ // We don't need test this condition for transformation like following, as
+ // the DAG being transformed implies it is legal to take FP constant as
+ // operand.
+ //
+ // (fadd (fmul c, x), x) -> (fmul c+1, x)
+ //
+ bool AllowNewFpConst = (Level < AfterLegalizeDAG);
+
+ // If allow, fold (fadd (fneg x), x) -> 0.0
+ if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
+ N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
+ return DAG.getConstantFP(0.0, VT);
+ }
+
+ // If allow, fold (fadd x, (fneg x)) -> 0.0
+ if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
+ N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
+ return DAG.getConstantFP(0.0, VT);
+ }
+
+ // In unsafe math mode, we can fold chains of FADD's of the same value
+ // into multiplications. This transform is not safe in general because
+ // we are reducing the number of rounding steps.
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
+ !N0CFP && !N1CFP) {
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+
+ // (fadd (fmul c, x), x) -> (fmul c+1, x)
+ if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP00, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, NewCFP);
+ }
+
+ // (fadd (fmul x, c), x) -> (fmul c+1, x)
+ if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, NewCFP);
+ }
+
+ // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x)
+ if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(1) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP00, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), NewCFP);
+ }
+
+ // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x)
+ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), NewCFP);
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+
+ // (fadd x, (fmul c, x)) -> (fmul c+1, x)
+ if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP10, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, NewCFP);
+ }
+
+ // (fadd x, (fmul x, c)) -> (fmul c+1, x)
+ if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, NewCFP);
+ }
+
+
+ // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x)
+ if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(1) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP10, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), NewCFP);
+ }
+
+ // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x)
+ if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), NewCFP);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ // (fadd (fadd x, x), x) -> (fmul 3.0, x)
+ if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ (N0.getOperand(0) == N1)) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, DAG.getConstantFP(3.0, VT));
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
+ if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, DAG.getConstantFP(3.0, VT));
+ }
+ }
+
+ // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
+ if (AllowNewFpConst &&
+ N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0),
+ DAG.getConstantFP(4.0, VT));
+ }
+ }
+
+ // FADD -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
+ // fold (fsub A, 0) -> A
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fsub 0, B) -> -B
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
+ return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
+ }
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
+ return DAG.getNode(ISD::FADD, dl, VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+
+ // If 'unsafe math' is enabled, fold
+ // (fsub x, x) -> 0.0 &
+ // (fsub x, (fadd x, y)) -> (fneg y) &
+ // (fsub x, (fadd y, x)) -> (fneg y)
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N0 == N1)
+ return DAG.getConstantFP(0.0f, VT);
+
+ if (N1.getOpcode() == ISD::FADD) {
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+
+ if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N11, DAG, LegalOperations);
+ else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N10, DAG, LegalOperations);
+ }
+ }
+
+ // FSUB -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+ }
+
+ // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (fmul A, 0) -> 0
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
+ return N1;
+ // fold (fmul A, 0) -> 0, vector edition.
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+ // fold (fmul A, 1.0) -> A
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return N0;
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
+ // fold (fmul X, -1.0) -> (fneg X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0))
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
+
+ // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N0.getOpcode() == ISD::FMUL &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N0CFP && N0CFP->isZero())
+ return N2;
+ if (N1CFP && N1CFP->isZero())
+ return N2;
+ }
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2);
+
+ // Canonicalize (fma c, x, y) -> (fma x, c, y)
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
+
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N2.getOpcode() == ISD::FMUL &&
+ N0 == N2.getOperand(0) &&
+ N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
+ }
+
+
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0.getOpcode() == ISD::FMUL && N1CFP &&
+ N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
+ N2);
+ }
+
+ // (fma x, 1, y) -> (fadd x, y)
+ // (fma x, -1, y) -> (fadd (fneg x), y)
+ if (N1CFP) {
+ if (N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+
+ if (N1CFP->isExactlyValue(-1.0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
+ SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+ AddToWorkList(RHSNeg.getNode());
+ return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+ }
+ }
+
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) {
+ return DAG.getNode(ISD::FMUL, dl, VT,
+ N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(1.0, VT)));
+ }
+
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, dl, VT,
+ N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(-1.0, VT)));
+ }
+
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
+ // Compute the reciprocal 1.0 / c2.
+ APFloat N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
+ DAG.getConstantFP(Recip, VT));
+ }
+
+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ if (N0CFP && N1CFP) // Constant fold
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
+
+ if (N1CFP) {
+ const APFloat& V = N1CFP->getValueAPF();
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ if (!V.isNegative()) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ } else {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
+ }
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (N0C &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+ // but UINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select_cc x, y, 1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
+ DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(0).getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (N0C &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+ // but SINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (fp_round x)) -> (fp_round x)
+ if (N0.getOpcode() == ISD::FP_ROUND) {
+ // This is a value preserving truncation if both round's are.
+ bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
+ N0.getNode()->getConstantOperandVal(1) == 1;
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(IsTrunc));
+ }
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(Tmp.getNode());
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ Tmp, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+ // fold (fp_round_inreg c1fp) -> c1fp
+ if (N0CFP && isTypeLegal(EVT)) {
+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+ // value of X.
+ if (N0.getOpcode() == ISD::FP_ROUND
+ && N0.getNode()->getConstantOperandVal(1) == 1) {
+ SDValue In = N0.getOperand(0);
+ if (In.getValueType() == VT) return In;
+ if (VT.bitsLT(In.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
+ In, N0.getOperand(1));
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
+ }
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVUnaryOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N0, DAG, LegalOperations);
+
+ // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+ // constant pool values.
+ if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
+ !VT.isVector() &&
+ N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ VT, Int);
+ }
+ }
+
+ // (fneg (fmul c, x)) -> (fmul -c, x)
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ if (CFP1) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ N0.getOperand(1)));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVUnaryOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fabs c1) -> fabs(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+ // constant pool values.
+ if (!TLI.isFAbsFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger() &&
+ !N0.getOperand(0).getValueType().isVector()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ N1.getOperand(0).getValueType())) {
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+
+ if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+ ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+ (N1.getOperand(0).hasOneUse() &&
+ N1.getOperand(0).getOpcode() == ISD::SRL))) {
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE) {
+ // Look pass the truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
+
+ // Match this pattern so that we can generate simpler code:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // This applies only when the AND constant value has one bit set and the
+ // SRL constant is equal to the log2 of the AND constant. The back-end is
+ // smart enough to convert the result into a TEST/JMP sequence.
+ SDValue Op0 = N1.getOperand(0);
+ SDValue Op1 = N1.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::AND &&
+ Op1.getOpcode() == ISD::Constant) {
+ SDValue AndOp1 = Op0.getOperand(1);
+
+ if (AndOp1.getOpcode() == ISD::Constant) {
+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+ if (AndConst.isPowerOf2() &&
+ cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ SDValue SetCC =
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETNE);
+
+ SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ // Don't add the new BRCond into the worklist or else SimplifySelectCC
+ // will convert it back to (X & C1) >> C2.
+ CombineTo(N, NewBRCond, false);
+ // Truncate is dead.
+ if (Trunc) {
+ removeFromWorkList(Trunc);
+ DAG.DeleteNode(Trunc);
+ }
+ // Replace the uses of SRL with SETCC
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (Trunc)
+ // Restore N1 if the above transformation doesn't match.
+ N1 = N->getOperand(1);
+ }
+
+ // Transform br(xor(x, y)) -> br(x != y)
+ // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+ SDNode *TheXor = N1.getNode();
+ SDValue Op0 = TheXor->getOperand(0);
+ SDValue Op1 = TheXor->getOperand(1);
+ if (Op0.getOpcode() == Op1.getOpcode()) {
+ // Avoid missing important xor optimizations.
+ SDValue Tmp = visitXOR(TheXor);
+ if (Tmp.getNode()) {
+ if (Tmp.getNode() != TheXor) {
+ DEBUG(dbgs() << "\nReplacing.8 ";
+ TheXor->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Tmp.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
+ removeFromWorkList(TheXor);
+ DAG.DeleteNode(TheXor);
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, Tmp, N2);
+ }
+
+ // visitXOR has changed XOR's operands or replaced the XOR completely,
+ // bail out.
+ return SDValue(N, 0);
+ }
+ }
+
+ if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+ bool Equal = false;
+ if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+ if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::XOR) {
+ TheXor = Op0.getNode();
+ Equal = true;
+ }
+
+ EVT SetCCVT = N1.getValueType();
+ if (LegalTypes)
+ SetCCVT = TLI.getSetCCResultType(SetCCVT);
+ SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+ SetCCVT,
+ Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
+ // Replace the uses of XOR with SETCC
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ }
+ }
+
+ return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
+ CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
+ false);
+ if (Simp.getNode()) AddToWorkList(Simp.getNode());
+
+ // fold to a simpler setcc
+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ N->getOperand(0), Simp.getOperand(2),
+ Simp.getOperand(0), Simp.getOperand(1),
+ N->getOperand(4));
+
+ return SDValue();
+}
+
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = Use->getValueType(0);
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getValue().getValueType();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store into a
+/// pre-indexed load / store when the base pointer is an add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr.getNode()->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+
+ // Backends without true r+i pre-indexed forms may need to pass a
+ // constant base with a variable offset so that constant coercion
+ // will work with the patterns in canonical form.
+ bool Swapped = false;
+ if (isa<ConstantSDNode>(BasePtr)) {
+ std::swap(BasePtr, Offset);
+ Swapped = true;
+ }
+
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+ return false;
+ }
+
+ // If the offset is a constant, there may be other adds of constants that
+ // can be folded with this one. We should do this to avoid having to keep
+ // a copy of the original base pointer.
+ SmallVector<SDNode *, 16> OtherUses;
+ if (isa<ConstantSDNode>(Offset))
+ for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
+ E = BasePtr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == Ptr.getNode())
+ continue;
+
+ if (Use->isPredecessorOf(N))
+ continue;
+
+ if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
+ OtherUses.clear();
+ break;
+ }
+
+ SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
+ if (Op1.getNode() == BasePtr.getNode())
+ std::swap(Op0, Op1);
+ assert(Op0.getNode() == BasePtr.getNode() &&
+ "Use of ADD/SUB but not an operand");
+
+ if (!isa<ConstantSDNode>(Op1)) {
+ OtherUses.clear();
+ break;
+ }
+
+ // FIXME: In some cases, we can be smarter about this.
+ if (Op1.getValueType() != Offset.getValueType()) {
+ OtherUses.clear();
+ break;
+ }
+
+ OtherUses.push_back(Use);
+ }
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == N)
+ continue;
+ if (N->hasPredecessorHelper(Use, Visited, Worklist))
+ return false;
+
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
+ RealUse = true;
+ }
+
+ if (!RealUse)
+ return false;
+
+ SDValue Result;
+ if (isLoad)
+ Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ else
+ Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.4 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Replace other uses of BasePtr that can be updated to use Ptr
+ for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
+ unsigned OffsetIdx = 1;
+ if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
+ OffsetIdx = 0;
+ assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
+ BasePtr.getNode() && "Expected BasePtr operand");
+
+ APInt OV =
+ cast<ConstantSDNode>(Offset)->getAPIntValue();
+ if (AM == ISD::PRE_DEC)
+ OV = -OV;
+
+ ConstantSDNode *CN =
+ cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
+ APInt CNV = CN->getAPIntValue();
+ if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
+ CNV += OV;
+ else
+ CNV -= OV;
+
+ SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
+ SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
+ if (OffsetIdx == 0)
+ std::swap(NewOp1, NewOp2);
+
+ SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
+ OtherUses[i]->getDebugLoc(),
+ OtherUses[i]->getValueType(0), NewOp1, NewOp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
+ removeFromWorkList(OtherUses[i]);
+ DAG.DeleteNode(OtherUses[i]);
+ }
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
+ removeFromWorkList(Ptr.getNode());
+ DAG.DeleteNode(Ptr.getNode());
+
+ return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ if (Ptr.getNode()->hasOneUse())
+ return false;
+
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Op = *I;
+ if (Op == N ||
+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ continue;
+
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ continue;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ continue;
+
+ // Check for #1.
+ bool TryNext = false;
+ for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
+ EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
+ SDNode *Use = *II;
+ if (Use == Ptr.getNode())
+ continue;
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ bool RealUse = false;
+ for (SDNode::use_iterator III = Use->use_begin(),
+ EEE = Use->use_end(); III != EEE; ++III) {
+ SDNode *UseUse = *III;
+ if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
+ RealUse = true;
+ }
+
+ if (!RealUse) {
+ TryNext = true;
+ break;
+ }
+ }
+ }
+
+ if (TryNext)
+ continue;
+
+ // Check for #2
+ if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+ SDValue Result = isLoad
+ ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.5 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(isLoad ? 1 : 0));
+ removeFromWorkList(Op);
+ DAG.DeleteNode(Op);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ if (!LD->isVolatile()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (!N->hasAnyUseOfValue(0)) {
+ // It's not safe to use the two value CombineTo variant here. e.g.
+ // v1, chain2 = load chain1, loc
+ // v2, chain3 = load chain2, loc
+ // v3 = add v2, c
+ // Now we replace use of chain2 with chain1. This makes the second load
+ // isomorphic to the one we are deleting, and thus makes this load live.
+ DEBUG(dbgs() << "\nReplacing.6 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith chain: ";
+ Chain.getNode()->dump(&DAG);
+ dbgs() << "\n");
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+
+ if (N->use_empty()) {
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+ if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ DEBUG(dbgs() << "\nReplacing.7 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Undef.getNode()->dump(&DAG);
+ dbgs() << " and 2 other values\n");
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ DAG.getUNDEF(N->getValueType(1)));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ // TODO: Handle store large -> read small portion.
+ // TODO: Handle TRUNCSTORE/LOADEXT
+ if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
+ if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+ StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+ if (PrevST->getBasePtr() == Ptr &&
+ PrevST->getValue().getValueType() == N->getValueType(0))
+ return CombineTo(N, Chain.getOperand(1), Chain);
+ }
+ }
+
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > LD->getMemOperand()->getBaseAlignment()) {
+ SDValue NewLoad =
+ DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
+ LD->isVolatile(), LD->isNonTemporal(), Align);
+ return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ }
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
+ BetterChain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
+ LD->isVolatile(),
+ LD->isNonTemporal(),
+ LD->getAlignment());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplLoad.getValue(1));
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
+ // Replace uses with load result and token factor. Don't add users
+ // to work list.
+ return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
+/// load is having specific bytes cleared out. If so, return the byte size
+/// being masked out and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+ std::pair<unsigned, unsigned> Result(0, 0);
+
+ // Check for the structure we're looking for.
+ if (V->getOpcode() != ISD::AND ||
+ !isa<ConstantSDNode>(V->getOperand(1)) ||
+ !ISD::isNormalLoad(V->getOperand(0).getNode()))
+ return Result;
+
+ // Check the chain and pointer.
+ LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+ if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
+
+ // The store should be chained directly to the load or be an operand of a
+ // tokenfactor.
+ if (LD == Chain.getNode())
+ ; // ok.
+ else if (Chain->getOpcode() != ISD::TokenFactor)
+ return Result; // Fail.
+ else {
+ bool isOk = false;
+ for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
+ if (Chain->getOperand(i).getNode() == LD) {
+ isOk = true;
+ break;
+ }
+ if (!isOk) return Result;
+ }
+
+ // This only handles simple types.
+ if (V.getValueType() != MVT::i16 &&
+ V.getValueType() != MVT::i32 &&
+ V.getValueType() != MVT::i64)
+ return Result;
+
+ // Check the constant mask. Invert it so that the bits being masked out are
+ // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
+ // follow the sign bit for uniformity.
+ uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+ unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
+ if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
+ unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
+ if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
+ if (NotMaskLZ == 64) return Result; // All zero mask.
+
+ // See if we have a continuous run of bits. If so, we have 0*1+0*
+ if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+ return Result;
+
+ // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+ if (V.getValueType() != MVT::i64 && NotMaskLZ)
+ NotMaskLZ -= 64-V.getValueSizeInBits();
+
+ unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+ switch (MaskedBytes) {
+ case 1:
+ case 2:
+ case 4: break;
+ default: return Result; // All one mask, or 5-byte mask.
+ }
+
+ // Verify that the first bit starts at a multiple of mask so that the access
+ // is aligned the same as the access width.
+ if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+
+ Result.first = MaskedBytes;
+ Result.second = NotMaskTZ/8;
+ return Result;
+}
+
+
+/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
+/// provides a value as specified by MaskInfo. If so, replace the specified
+/// store with a narrower store of truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+ SDValue IVal, StoreSDNode *St,
+ DAGCombiner *DC) {
+ unsigned NumBytes = MaskInfo.first;
+ unsigned ByteShift = MaskInfo.second;
+ SelectionDAG &DAG = DC->getDAG();
+
+ // Check to see if IVal is all zeros in the part being masked in by the 'or'
+ // that uses this. If not, this is not a replacement.
+ APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+ ByteShift*8, (ByteShift+NumBytes)*8);
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+
+ // Check that it is legal on the target to do this. It is legal if the new
+ // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+ // legalization.
+ MVT VT = MVT::getIntegerVT(NumBytes*8);
+ if (!DC->isTypeLegal(VT))
+ return 0;
+
+ // Okay, we can do this! Replace the 'St' store with a store of IVal that is
+ // shifted by ByteShift and truncated down to NumBytes.
+ if (ByteShift)
+ IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
+ DAG.getConstant(ByteShift*8,
+ DC->getShiftAmountTy(IVal.getValueType())));
+
+ // Figure out the offset for the store and the alignment of the access.
+ unsigned StOffset;
+ unsigned NewAlign = St->getAlignment();
+
+ if (DAG.getTargetLoweringInfo().isLittleEndian())
+ StOffset = ByteShift;
+ else
+ StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+
+ SDValue Ptr = St->getBasePtr();
+ if (StOffset) {
+ Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
+ Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+ NewAlign = MinAlign(NewAlign, StOffset);
+ }
+
+ // Truncate down to the new size.
+ IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
+
+ ++OpsNarrowed;
+ return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
+ false, false, NewAlign).getNode();
+}
+
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (ST->isVolatile())
+ return SDValue();
+
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ EVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Value.getOpcode();
+
+ // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+ // is a byte mask indicating a consecutive number of bytes, check to see if
+ // Y is known to provide just those bytes. If so, we try to replace the
+ // load + replace + store sequence with a single (narrower) store, which makes
+ // the load dead.
+ if (Opc == ISD::OR) {
+ std::pair<unsigned, unsigned> MaskedLoad;
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(1), ST,this))
+ return SDValue(NewST, 0);
+
+ // Or is commutative, so try swapping X and Y.
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(0), ST,this))
+ return SDValue(NewST, 0);
+ }
+
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ Chain == SDValue(N0.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
+ return SDValue();
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ if (Imm == 0 || Imm.isAllOnesValue())
+ return SDValue();
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ while (NewBW < BitWidth &&
+ !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+ TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ }
+ if (NewBW >= BitWidth)
+ return SDValue();
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
+ std::min(BitWidth, ShAmt + NewBW));
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian())
+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+ if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
+ return SDValue();
+
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, Ptr.getValueType()));
+ SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+ LD->getChain(), NewPtr,
+ LD->getPointerInfo().getWithOffset(PtrOff),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), NewAlign);
+ SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+ DAG.getConstant(NewImm, NewVT));
+ SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+ NewVal, NewPtr,
+ ST->getPointerInfo().getWithOffset(PtrOff),
+ false, false, NewAlign);
+
+ AddToWorkList(NewPtr.getNode());
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewVal.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue();
+}
+
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ false, false, false, LDAlign);
+
+ SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+ NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ false, false, STAlign);
+
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewST.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (add (i8 load %index)
+/// (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
+/// (i32 1)))))
+struct BaseIndexOffset {
+ SDValue Base;
+ SDValue Index;
+ int64_t Offset;
+ bool IsIndexSignExt;
+
+ BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+ BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+ bool IsIndexSignExt) :
+ Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+ bool equalBaseIndex(const BaseIndexOffset &Other) {
+ return Other.Base == Base && Other.Index == Index &&
+ Other.IsIndexSignExt == IsIndexSignExt;
+ }
+
+ /// Parses tree in Ptr for base, index, offset addresses.
+ static BaseIndexOffset match(SDValue Ptr) {
+ bool IsIndexSignExt = false;
+
+ // Just Base or possibly anything else.
+ if (Ptr->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Base + offset.
+ if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+ int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+ return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
+ IsIndexSignExt);
+ }
+
+ // Look at Base + Index + Offset cases.
+ SDValue Base = Ptr->getOperand(0);
+ SDValue IndexOffset = Ptr->getOperand(1);
+
+ // Skip signextends.
+ if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+ IndexOffset = IndexOffset->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Either the case of Base + Index (no offset) or something else.
+ if (IndexOffset->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+
+ // Now we have the case of Base + Index + offset.
+ SDValue Index = IndexOffset->getOperand(0);
+ SDValue Offset = IndexOffset->getOperand(1);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Ignore signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else IsIndexSignExt = false;
+
+ int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+ return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+ }
+};
+
+/// Holds a pointer to an LSBaseSDNode as well as information on where it
+/// is located in a sequence of memory operations connected by a chain.
+struct MemOpLink {
+ MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+ MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+ // What is the sequence number of this mem node.
+ // Lowest mem operand in the DAG starts at zero.
+ unsigned SequenceNum;
+};
+
+/// Sorts store nodes in a link according to their offset from a shared
+// base ptr.
+struct ConsecutiveMemoryChainSorter {
+ bool operator()(MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ }
+};
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+ EVT MemVT = St->getMemoryVT();
+ int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
+ bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+
+ // Don't merge vectors into wider inputs.
+ if (MemVT.isVector() || !MemVT.isSimple())
+ return false;
+
+ // Perform an early exit check. Do not bother looking at stored values that
+ // are not constants or loads.
+ SDValue StoredVal = St->getValue();
+ bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+ if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
+ !IsLoadSrc)
+ return false;
+
+ // Only look at ends of store sequences.
+ SDValue Chain = SDValue(St, 1);
+ if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+ return false;
+
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+
+ // We must have a base and an offset.
+ if (!BasePtr.Base.getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.Base.getOpcode() == ISD::UNDEF)
+ return false;
+
+ // Save the LoadSDNodes that we find in the chain.
+ // We need to make sure that these nodes do not interfere with
+ // any of the store nodes.
+ SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
+
+ // Save the StoreSDNodes that we find in the chain.
+ SmallVector<MemOpLink, 8> StoreNodes;
+
+ // Walk up the chain and look for nodes with offsets from the same
+ // base pointer. Stop when reaching an instruction with a different kind
+ // or instruction which has a different base pointer.
+ unsigned Seq = 0;
+ StoreSDNode *Index = St;
+ while (Index) {
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (Index != St && !SDValue(Index, 1)->hasOneUse())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+
+ // Check that the base pointer is the same as the original one.
+ if (!Ptr.equalBaseIndex(BasePtr))
+ break;
+
+ // Check that the alignment is the same.
+ if (Index->getAlignment() != St->getAlignment())
+ break;
+
+ // The memory operands must not be volatile.
+ if (Index->isVolatile() || Index->isIndexed())
+ break;
+
+ // No truncation.
+ if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
+ if (St->isTruncatingStore())
+ break;
+
+ // The stored memory type must be the same.
+ if (Index->getMemoryVT() != MemVT)
+ break;
+
+ // We do not allow unaligned stores because we want to prevent overriding
+ // stores.
+ if (Index->getAlignment()*8 != MemVT.getSizeInBits())
+ break;
+
+ // We found a potential memory operand to merge.
+ StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
+
+ // Find the next memory operand in the chain. If the next operand in the
+ // chain is a store then move up and continue the scan with the next
+ // memory operand. If the next operand is a load save it and use alias
+ // information to check if it interferes with anything.
+ SDNode *NextInChain = Index->getChain().getNode();
+ while (1) {
+ if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+ // We found a store node. Use it for the next iteration.
+ Index = STn;
+ break;
+ } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+ // Save the load node for later. Continue the scan.
+ AliasLoadNodes.push_back(Ldn);
+ NextInChain = Ldn->getChain().getNode();
+ continue;
+ } else {
+ Index = NULL;
+ break;
+ }
+ }
+ }
+
+ // Check if there is anything to merge.
+ if (StoreNodes.size() < 2)
+ return false;
+
+ // Sort the memory operands according to their distance from the base pointer.
+ std::sort(StoreNodes.begin(), StoreNodes.end(),
+ ConsecutiveMemoryChainSorter());
+
+ // Scan the memory operations on the chain and find the first non-consecutive
+ // store memory address.
+ unsigned LastConsecutiveStore = 0;
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+ for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
+
+ // Check that the addresses are consecutive starting from the second
+ // element in the list of stores.
+ if (i > 0) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ }
+
+ bool Alias = false;
+ // Check if this store interferes with any of the loads that we found.
+ for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
+ if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
+ Alias = true;
+ break;
+ }
+ // We found a load that alias with this store. Stop the sequence.
+ if (Alias)
+ break;
+
+ // Mark this node as useful.
+ LastConsecutiveStore = i;
+ }
+
+ // The node with the lowest store address.
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+
+ // Store the constants into memory as one consecutive store.
+ if (!IsLoadSrc) {
+ unsigned LastLegalType = 0;
+ unsigned LastLegalVectorType = 0;
+ bool NonZero = false;
+ for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = St->getValue();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+ NonZero |= !C->isNullValue();
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
+ NonZero |= !C->getConstantFPValue()->isNullValue();
+ } else {
+ // Non constant.
+ break;
+ }
+
+ // Find a legal type for the constant store.
+ unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ if (TLI.isTypeLegal(StoreTy))
+ LastLegalType = i+1;
+ // Or check whether a truncstore is legal.
+ else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
+ LastLegalType = i+1;
+ }
+
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(Ty))
+ LastLegalVectorType = i + 1;
+ }
+
+ // We only use vectors if the constant is known to be zero and the
+ // function is not marked with the noimplicitfloat attribute.
+ if (NonZero || NoVectors)
+ LastLegalVectorType = 0;
+
+ // Check if we found a legal integer type to store.
+ if (LastLegalType == 0 && LastLegalVectorType == 0)
+ return false;
+
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+
+ // Make sure we have something to merge.
+ if (NumElem < 2)
+ return false;
+
+ unsigned EarliestNodeUsed = 0;
+ for (unsigned i=0; i < NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // The earliest Node in the DAG.
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc();
+
+ SDValue StoredVal;
+ if (UseVector) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+ StoredVal = DAG.getConstant(0, Ty);
+ } else {
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ APInt StoreInt(StoreBW, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = TLI.isLittleEndian();
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ SDValue Val = St->getValue();
+ StoreInt<<=ElementSizeBytes*8;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt|=C->getAPIntValue().zext(StoreBW);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+ } else {
+ assert(false && "Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ StoredVal = DAG.getConstant(StoreInt, StoreTy);
+ }
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ false, false,
+ FirstInChain->getAlignment());
+
+ // Replace the first store with the new store
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ removeFromWorkList(St);
+ DAG.DeleteNode(St);
+ }
+
+ return true;
+ }
+
+ // Below we handle the case of multiple consecutive stores that
+ // come from multiple consecutive loads. We merge them into a single
+ // wide load and a single wide store.
+
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
+
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ BaseIndexOffset LdBasePtr;
+ for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+ if (!Ld) break;
+
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
+ break;
+
+ // Check that the alignment is the same as the stores.
+ if (Ld->getAlignment() != St->getAlignment())
+ break;
+
+ // The memory operands must not be volatile.
+ if (Ld->isVolatile() || Ld->isIndexed())
+ break;
+
+ // We do not accept ext loads.
+ if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ break;
+
+ // The stored memory type must be the same.
+ if (Ld->getMemoryVT() != MemVT)
+ break;
+
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
+ // If this is not the first ptr that we check.
+ if (LdBasePtr.Base.getNode()) {
+ // The base ptr must be the same.
+ if (!LdPtr.equalBaseIndex(LdBasePtr))
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr;
+ }
+
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
+ }
+
+ if (LoadNodes.size() < 2)
+ return false;
+
+ // Scan the memory operations on the chain and find the first non-consecutive
+ // load memory address. These variables hold the index in the store node
+ // array.
+ unsigned LastConsecutiveLoad = 0;
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 0;
+ unsigned LastLegalIntegerType = 0;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue FirstChain = LoadNodes[0].MemNode->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads much share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != FirstChain)
+ break;
+
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
+
+ // Find a legal type for the vector store.
+ EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(StoreTy))
+ LastLegalVectorType = i + 1;
+
+ // Find a legal type for the integer store.
+ unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ if (TLI.isTypeLegal(StoreTy))
+ LastLegalIntegerType = i + 1;
+ // Or check whether a truncstore and extload is legal.
+ else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
+ LastLegalIntegerType = i+1;
+ }
+ }
+
+ // Only use vector types if the vector type is larger than the integer type.
+ // If they are the same, use integers.
+ bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
+ unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
+
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
+ NumElem = std::min(LastLegalType, NumElem);
+
+ if (NumElem < 2)
+ return false;
+
+ // The earliest Node in the DAG.
+ unsigned EarliestNodeUsed = 0;
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ for (unsigned i=1; i<NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ } else {
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ }
+
+ DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc();
+ DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc();
+
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
+ FirstLoad->getChain(),
+ FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(),
+ false, false, false,
+ FirstLoad->getAlignment());
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), false, false,
+ FirstInChain->getAlignment());
+
+ // Replace one of the loads with the new load.
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+
+ // Remove the rest of the load chains.
+ for (unsigned i = 1; i < NumElem ; ++i) {
+ // Replace all chain users of the old load nodes with the chain of the new
+ // load node.
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+ }
+
+ // Replace the first store with the new store.
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ // Remove all Store nodes.
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
+ removeFromWorkList(St);
+ DAG.DeleteNode(St);
+ }
+
+ return true;
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
+ ST->isUnindexed()) {
+ unsigned OrigAlign = ST->getAlignment();
+ EVT SVT = Value.getOperand(0).getValueType();
+ unsigned Align = TLI.getDataLayout()->
+ getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+ if (Align <= OrigAlign &&
+ ((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
+ return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), OrigAlign);
+ }
+
+ // Turn 'store undef, Ptr' -> nothing.
+ if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+ return Chain;
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+ if (Value.getOpcode() != ISD::TargetConstantFP) {
+ SDValue Tmp;
+ switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP type");
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
+ case MVT::f128:
+ case MVT::ppcf128:
+ break;
+ case MVT::f32:
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), MVT::i32);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+ }
+ break;
+ case MVT::f64:
+ if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), MVT::i64);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
+ Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal,
+ ST->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, Ptr.getValueType()));
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
+ Alignment);
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ St0, St1);
+ }
+
+ break;
+ }
+ }
+ }
+
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > ST->getAlignment())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(), Align);
+ }
+ }
+
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ SDValue NewST = TransformFPLoadStorePair(N);
+ if (NewST.getNode())
+ return NewST;
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getPointerInfo(),
+ ST->getMemoryVT(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(N, Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // FIXME: is there such a thing as a truncating indexed store?
+ if (ST->isTruncatingStore() && ST->isUnindexed() &&
+ Value.getValueType().isInteger()) {
+ // See if we can simplify the input to this truncstore with knowledge that
+ // only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ SDValue Shorter =
+ GetDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits()));
+ AddToWorkList(Value.getNode());
+ if (Shorter.getNode())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+
+ // Otherwise, see if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits())))
+ return SDValue(N, 0);
+ }
+
+ // If this is a load followed by a store to the same location, then the store
+ // is dead/noop.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+ ST->isUnindexed() && !ST->isVolatile() &&
+ // There can't be any side effects between the load and store, such as
+ // a call or store.
+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+ // truncating store. We can do this even if this is already a truncstore.
+ if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+ && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT())) {
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+ }
+
+ // Only perform this optimization before the types are legal, because we
+ // don't want to perform this optimization on every DAGCombine invocation.
+ if (!LegalTypes) {
+ bool EverChanged = false;
+
+ do {
+ // There can be multiple store sequences on the same chain.
+ // Keep trying to merge store sequences until we are unable to do so
+ // or until we merge the last store on the chain.
+ bool Changed = MergeConsecutiveStores(ST);
+ EverChanged |= Changed;
+ if (!Changed) break;
+ } while (ST->getOpcode() != ISD::DELETED_NODE);
+
+ if (EverChanged)
+ return SDValue(N, 0);
+ }
+
+ return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (InVal.getOpcode() == ISD::UNDEF)
+ return InVec;
+
+ EVT VT = InVec.getValueType();
+
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
+ // Check that we know which element is being inserted
+ if (!isa<ConstantSDNode>(EltNo))
+ return SDValue();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
+ // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
+ // vector elements.
+ SmallVector<SDValue, 8> Ops;
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ Ops.append(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ } else if (InVec.getOpcode() == ISD::UNDEF) {
+ unsigned NElts = VT.getVectorNumElements();
+ Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
+ } else {
+ return SDValue();
+ }
+
+ // Insert the element
+ if (Elt < Ops.size()) {
+ // All the operands of BUILD_VECTOR must have the same type;
+ // we enforce that here.
+ EVT OpVT = Ops[0].getValueType();
+ if (InVal.getValueType() != OpVT)
+ InVal = OpVT.bitsGT(InVal.getValueType()) ?
+ DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
+ DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+ Ops[Elt] = InVal;
+ }
+
+ // Return the new vector
+ return DAG.getNode(ISD::BUILD_VECTOR, dl,
+ VT, &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+ // (vextract (scalar_to_vector val, 0) -> val
+ SDValue InVec = N->getOperand(0);
+ EVT VT = InVec.getValueType();
+ EVT NVT = N->getValueType(0);
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ SDValue InOp = InVec.getOperand(0);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+ }
+ return InOp;
+ }
+
+ SDValue EltNo = N->getOperand(1);
+ bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+ // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+ // We only perform this optimization before the op legalization phase because
+ // we may introduce new vector instructions which are not backed by TD
+ // patterns. For example on AVX, extracting elements from a wide vector
+ // without using extract_subvector.
+ if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+ && ConstEltNo && !LegalOperations) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int NumElem = VT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+ // Find the new index to extract from.
+ int OrigElt = SVOp->getMaskElt(Elt);
+
+ // Extracting an undef index is undef.
+ if (OrigElt == -1)
+ return DAG.getUNDEF(NVT);
+
+ // Select the right vector half to extract from.
+ if (OrigElt < NumElem) {
+ InVec = InVec->getOperand(0);
+ } else {
+ InVec = InVec->getOperand(1);
+ OrigElt -= NumElem;
+ }
+
+ EVT IndexTy = N->getOperand(1).getValueType();
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+ InVec, DAG.getConstant(OrigElt, IndexTy));
+ }
+
+ // Perform only after legalization to ensure build_vector / vector_shuffle
+ // optimizations have already been done.
+ if (!LegalOperations) return SDValue();
+
+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+
+ if (ConstEltNo) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ bool NewLoad = false;
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ NewLoad = true;
+ }
+
+ LoadSDNode *LN0 = NULL;
+ const ShuffleVectorSDNode *SVN = NULL;
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ InVec.getOperand(0).getValueType() == ExtVT &&
+ ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+ } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
+
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
+ InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ InVec = InVec.getOperand(0);
+ }
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+ }
+ }
+
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+ return SDValue();
+
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LVT);
+
+ unsigned Align = LN0->getAlignment();
+ if (NewLoad) {
+ // Check the resultant load doesn't need a higher alignment than the
+ // original load.
+ unsigned NewAlign =
+ TLI.getDataLayout()
+ ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+ return SDValue();
+
+ Align = NewAlign;
+ }
+
+ SDValue NewPtr = LN0->getBasePtr();
+ unsigned PtrOff = 0;
+
+ if (Elt) {
+ PtrOff = LVT.getSizeInBits() * Elt / 8;
+ EVT PtrType = NewPtr.getValueType();
+ if (TLI.isBigEndian())
+ PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+ NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
+ DAG.getConstant(PtrOff, PtrType));
+ }
+
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (NVT.bitsGT(LVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+ ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+ NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), Align);
+ Chain = Load.getValue(1);
+ if (NVT.bitsLT(LVT))
+ Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Since we're explcitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(N);
+ return SDValue(N, 0);
+ }
+
+ return SDValue();
+}
+
+// Simplify (build_vec (ext )) to (bitcast (build_vec ))
+SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return SDValue();
+
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of values
+ // which come from any_extend or zero_extend nodes. If so, we can create
+ // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+ // optimizations. We do not handle sign-extend because we can't fill the sign
+ // using shuffles.
+ EVT SourceType = MVT::Other;
+ bool AllAnyExt = true;
+
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ // Ignore undef inputs.
+ if (In.getOpcode() == ISD::UNDEF) continue;
+
+ bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
+ bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+ // Abort if the element is not an extension.
+ if (!ZeroExt && !AnyExt) {
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // The input is a ZeroExt or AnyExt. Check the original type.
+ EVT InTy = In.getOperand(0).getValueType();
+
+ // Check that all of the widened source types are the same.
+ if (SourceType == MVT::Other)
+ // First time.
+ SourceType = InTy;
+ else if (InTy != SourceType) {
+ // Multiple income types. Abort.
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // Check if all of the extends are ANY_EXTENDs.
+ AllAnyExt &= AnyExt;
+ }
+
+ // In order to have valid types, all of the inputs must be extended from the
+ // same source type and all of the inputs must be any or zero extend.
+ // Scalar sizes must be a power of two.
+ EVT OutScalarTy = VT.getScalarType();
+ bool ValidTypes = SourceType != MVT::Other &&
+ isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
+ isPowerOf2_32(SourceType.getSizeInBits());
+
+ // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+ // turn into a single shuffle instruction.
+ if (!ValidTypes)
+ return SDValue();
+
+ bool isLE = TLI.isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, SourceType);
+
+ unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.getOpcode() == ISD::UNDEF)
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT)) return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorkList(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT SrcVT = MVT::Other;
+ unsigned Opcode = ISD::DELETED_NODE;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ unsigned Opc = In.getOpcode();
+
+ if (Opc == ISD::UNDEF)
+ continue;
+
+ // If all scalar values are floats and converted from integers.
+ if (Opcode == ISD::DELETED_NODE &&
+ (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+ Opcode = Opc;
+ }
+
+ if (Opc != Opcode)
+ return SDValue();
+
+ EVT InVT = In.getOperand(0).getValueType();
+
+ // If all scalar values are typed differently, bail out. It's chosen to
+ // simplify BUILD_VECTOR of integer types.
+ if (SrcVT == MVT::Other)
+ SrcVT = InVT;
+ if (SrcVT != InVT)
+ return SDValue();
+ NumDefs++;
+ }
+
+ // If the vector has just one element defined, it's not worth to fold it into
+ // a vectorized one.
+ if (NumDefs < 2)
+ return SDValue();
+
+ assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
+ && "Should only handle conversion from integer to float.");
+ assert(SrcVT != MVT::Other && "Cannot determine source type!");
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
+
+ if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
+ return SDValue();
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+
+ if (In.getOpcode() == ISD::UNDEF)
+ Opnds.push_back(DAG.getUNDEF(SrcVT));
+ else
+ Opnds.push_back(In.getOperand(0));
+ }
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
+ &Opnds[0], Opnds.size());
+ AddToWorkList(BV.getNode());
+
+ return DAG.getNode(Opcode, dl, VT, BV);
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ SDValue V = reduceBuildVecExtToExtBuildVec(N);
+ if (V.getNode())
+ return V;
+
+ V = reduceBuildVecConvertToConvertBuildVec(N);
+ if (V.getNode())
+ return V;
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+ // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+ // at most two distinct vectors, turn this into a shuffle node.
+
+ // May only combine to shuffle after legalize if shuffle is legal.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+ return SDValue();
+
+ SDValue VecIn1, VecIn2;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ // Ignore undef inputs.
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ // If this input is something other than a EXTRACT_VECTOR_ELT with a
+ // constant index, bail out.
+ if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // We allow up to two distinct input vectors.
+ SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+ if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+ continue;
+
+ if (VecIn1.getNode() == 0) {
+ VecIn1 = ExtractedFromVec;
+ } else if (VecIn2.getNode() == 0) {
+ VecIn2 = ExtractedFromVec;
+ } else {
+ // Too many inputs.
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+ }
+
+ // If everything is good, we can make a shuffle operation.
+ if (VecIn1.getNode()) {
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // If extracting from the first vector, just use the index directly.
+ SDValue Extract = N->getOperand(i);
+ SDValue ExtVal = Extract.getOperand(1);
+ if (Extract.getOperand(0) == VecIn1) {
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ if (ExtIndex > VT.getVectorNumElements())
+ return SDValue();
+
+ Mask.push_back(ExtIndex);
+ continue;
+ }
+
+ // Otherwise, use InIdx + VecSize
+ unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ Mask.push_back(Idx+NumInScalars);
+ }
+
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Attempt to transform a single input vector to the correct type.
+ if ((VT != VecIn1.getValueType())) {
+ // We don't support shuffeling between TWO values of different types.
+ if (VecIn2.getNode() != 0)
+ return SDValue();
+
+ // We only support widening of vectors which are half the size of the
+ // output registers. For example XMM->YMM widening on X86 with AVX.
+ if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+ return SDValue();
+
+ // If the input vector type has a different base type to the output
+ // vector type, bail out.
+ if (VecIn1.getValueType().getVectorElementType() !=
+ VT.getVectorElementType())
+ return SDValue();
+
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ }
+
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+ // Check that we were able to transform all incoming values to the same
+ // type.
+ if (VecIn2.getValueType() != VecIn1.getValueType() ||
+ VecIn1.getValueType() != VT)
+ return SDValue();
+
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ SDValue Ops[2];
+ Ops[0] = VecIn1;
+ Ops[1] = VecIn2;
+ return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+ // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
+ // inputs come from at most two distinct vectors, turn this into a shuffle
+ // node.
+
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1)
+ return N->getOperand(0);
+
+ // Check if all of the operands are undefs.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(N->getValueType(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
+ EVT NVT = N->getValueType(0);
+ SDValue V = N->getOperand(0);
+
+ if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+ if (V->getOperand(0).getValueType() != NVT)
+ return SDValue();
+ unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned NumElems = NVT.getVectorNumElements();
+ assert((Idx % NumElems) == 0 &&
+ "IDX in concat is not a multiple of the result vector length.");
+ return V->getOperand(Idx / NumElems);
+ }
+
+ // Skip bitcasting
+ if (V->getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ DebugLoc dl = N->getDebugLoc();
+ // Handle only simple case where vector being inserted and vector
+ // being extracted are of same type, and are half size of larger vectors.
+ EVT BigVT = V->getOperand(0).getValueType();
+ EVT SmallVT = V->getOperand(1).getValueType();
+ if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ return SDValue();
+
+ // Only handle cases where both indexes are constants with the same type.
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+
+ if (InsIdx && ExtIdx &&
+ InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+ ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal or bit offsets are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
+ ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
+ DAG.getNode(ISD::BITCAST, dl,
+ N->getOperand(0).getValueType(),
+ V->getOperand(0)), N->getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N0 == N1) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) Idx -= NumElts;
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N0.getOpcode() == ISD::UNDEF) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElts)
+ Idx += NumElts;
+ else
+ Idx -= NumElts;
+ }
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
+
+ // Remove references to rhs if it is undef
+ if (N1.getOpcode() == ISD::UNDEF) {
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) {
+ Idx = -1;
+ Changed = true;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]);
+ }
+
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector with all scalar elements the same.
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
+ SDNode *V = N0.getNode();
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ if (V->getOpcode() == ISD::BITCAST) {
+ SDValue ConvInput = V->getOperand(0);
+ if (ConvInput.getValueType().isVector() &&
+ ConvInput.getValueType().getVectorNumElements() == NumElts)
+ V = ConvInput.getNode();
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+ }
+ }
+
+ // If this shuffle node is simply a swizzle of another shuffle node,
+ // and it reverses the swizzle of the previous shuffle then we can
+ // optimize shuffle(shuffle(x, undef), undef) -> x.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N1.getOpcode() == ISD::UNDEF) {
+
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // Shuffle nodes can only reverse shuffles with a single non-undef value.
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ assert(Idx < (int)NumElts && "Index references undef operand");
+ // Next, this index comes from the first value, which is the incoming
+ // shuffle. Adopt the incoming index.
+ if (Idx >= 0)
+ Idx = OtherSV->getMaskElt(Idx);
+
+ // The combined shuffle must map each index to itself.
+ if (Idx >= 0 && (unsigned)Idx != i)
+ return SDValue();
+ }
+
+ return OtherSV->getOperand(0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
+ if (!TLI.getShouldFoldAtomicFences())
+ return SDValue();
+
+ SDValue atomic = N->getOperand(0);
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDValue fence = atomic.getOperand(0);
+ if (fence.getOpcode() != ISD::MEMBARRIER)
+ return SDValue();
+
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2),
+ atomic.getOperand(3)), atomic.getResNo());
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2)),
+ atomic.getResNo());
+ default:
+ return SDValue();
+ }
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (N->getOpcode() == ISD::AND) {
+ if (RHS.getOpcode() == ISD::BITCAST)
+ RHS = RHS.getOperand(0);
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<int, 8> Indices;
+ unsigned NumElts = RHS.getNumOperands();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Elt = RHS.getOperand(i);
+ if (!isa<ConstantSDNode>(Elt))
+ return SDValue();
+
+ if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+ Indices.push_back(i);
+ else if (cast<ConstantSDNode>(Elt)->isNullValue())
+ Indices.push_back(NumElts);
+ else
+ return SDValue();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT RVT = RHS.getValueType();
+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ EVT EltVT = RVT.getVectorElementType();
+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+ DAG.getConstant(0, EltVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ RVT, &ZeroOps[0], ZeroOps.size());
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVBinOp only works on vectors!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Shuffle = XformToShuffleWithZero(N);
+ if (Shuffle.getNode()) return Shuffle;
+
+ // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+ // this operation.
+ if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+ RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ // If these two elements can't be folded, bail out.
+ if ((LHSOp.getOpcode() != ISD::UNDEF &&
+ LHSOp.getOpcode() != ISD::Constant &&
+ LHSOp.getOpcode() != ISD::ConstantFP) ||
+ (RHSOp.getOpcode() != ISD::UNDEF &&
+ RHSOp.getOpcode() != ISD::Constant &&
+ RHSOp.getOpcode() != ISD::ConstantFP))
+ break;
+
+ // Can't fold divide by zero.
+ if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+ N->getOpcode() == ISD::FDIV) {
+ if ((RHSOp.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
+ (RHSOp.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+ break;
+ }
+
+ EVT VT = LHSOp.getValueType();
+ EVT RVT = RHSOp.getValueType();
+ if (RVT != VT) {
+ // Integer BUILD_VECTOR operands may have types larger than the element
+ // size (e.g., when the element type is not legal). Prior to type
+ // legalization, the types may not match between the two BUILD_VECTORS.
+ // Truncate one of the operands to make them match.
+ if (RVT.getSizeInBits() > VT.getSizeInBits()) {
+ RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp);
+ } else {
+ LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp);
+ VT = RVT;
+ }
+ }
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
+ LHSOp, RHSOp);
+ if (FoldOp.getOpcode() != ISD::UNDEF &&
+ FoldOp.getOpcode() != ISD::Constant &&
+ FoldOp.getOpcode() != ISD::ConstantFP)
+ break;
+ Ops.push_back(FoldOp);
+ AddToWorkList(FoldOp.getNode());
+ }
+
+ if (Ops.size() == LHS.getNumOperands())
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ LHS.getValueType(), &Ops[0], Ops.size());
+ }
+
+ return SDValue();
+}
+
+/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
+SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVUnaryOp only works on vectors!");
+
+ SDValue N0 = N->getOperand(0);
+
+ if (N0.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Operand is a BUILD_VECTOR node, see if we can constant fold it.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue Op = N0.getOperand(i);
+ if (Op.getOpcode() != ISD::UNDEF &&
+ Op.getOpcode() != ISD::ConstantFP)
+ break;
+ EVT EltVT = Op.getValueType();
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op);
+ if (FoldOp.getOpcode() != ISD::UNDEF &&
+ FoldOp.getOpcode() != ISD::ConstantFP)
+ break;
+ Ops.push_back(FoldOp);
+ AddToWorkList(FoldOp.getNode());
+ }
+
+ if (Ops.size() != N0.getNumOperands())
+ return SDValue();
+
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ N0.getValueType(), &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
+ SDValue N1, SDValue N2){
+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.getNode()) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
+ N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4));
+ AddToWorkList(SETCC.getNode());
+ return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
+ SCC.getOperand(2), SCC.getOperand(3), SETCC);
+ }
+
+ return SCC;
+ }
+ return SDValue();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select. Callers of this should assume that TheSelect is deleted if this
+/// returns true. As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+ SDValue RHS) {
+
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
+
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
+ // Do not let this transformation reduce the number of volatile loads.
+ LLD->isVolatile() || RLD->isVolatile() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
+ // FIXME: this discards src value information. This is
+ // over-conservative. It would be beneficial to be able to remember
+ // both potential memory locations. Since we are discarding
+ // src value info, don't do the transformation if the memory
+ // locations are not in the default address space.
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0 ||
+ !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
+ LLD->getBasePtr().getValueType()))
+ return false;
+
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+ (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+ return false;
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) ||
+ RLD->isPredecessorOf(LLD))
+ return false;
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ (RLD->hasAnyUseOfValue(1) &&
+ (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
+ return false;
+
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->isVolatile(), LLD->isNonTemporal(),
+ LLD->isInvariant(), LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+ RLD->getExtensionType() : LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->getMemoryVT(), LLD->isVolatile(),
+ LLD->isNonTemporal(), LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+
+ return false;
+}
+
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3,
+ ISD::CondCode CC, bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
+ EVT VT = N2.getValueType();
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, DL, false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
+
+ // fold select_cc true, x, y -> x
+ if (SCCC && !SCCC->isNullValue())
+ return N2;
+ // fold select_cc false, x, y -> y
+ if (SCCC && SCCC->isNullValue())
+ return N3;
+
+ // Check to see if we can simplify the select into an fabs node
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+ // Allow either -0.0 or 0.0
+ if (CFP->getValueAPF().isZero()) {
+ // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+ if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+ N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+ N2 == N3.getOperand(0))
+ return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+ // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+ if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+ N2.getOperand(0) == N3)
+ return DAG.getNode(ISD::FABS, DL, VT, N3);
+ }
+ }
+
+ // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+ // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+ // in it. This is a win when the constant is not otherwise available because
+ // it replaces two constant pool loads with one. We only do this if the FP
+ // type is known to be legal, because if it isn't, then we are before legalize
+ // types an we want the other legalization to happen first (e.g. to avoid
+ // messing with soft float) and if the ConstantFP is not legal, because if
+ // it is legal, we may not need to store the FP constant in a constant pool.
+ if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+ if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+ if (TLI.isTypeLegal(N2.getValueType()) &&
+ (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+ TargetLowering::Legal) &&
+ // If both constants have multiple uses, then we won't need to do an
+ // extra load, they are likely around in registers for other users.
+ (TV->hasOneUse() || FV->hasOneUse())) {
+ Constant *Elts[] = {
+ const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue())
+ };
+ Type *FPTy = Elts[0]->getType();
+ const DataLayout &TD = *TLI.getDataLayout();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get the offsets to the 0 and 1 element of the array so that we can
+ // select between them.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize);
+
+ SDValue Cond = DAG.getSetCC(DL,
+ TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ AddToWorkList(Cond.getNode());
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+ Cond, One, Zero);
+ AddToWorkList(CstOffset.getNode());
+ CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CstOffset);
+ AddToWorkList(CPIdx.getNode());
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(), false,
+ false, false, Alignment);
+
+ }
+ }
+
+ // Check to see if we can perform the "gzip trick", transforming
+ // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+ if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+ (N1C->isNullValue() || // (a < 0) ? b : 0
+ (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
+ if (XType.bitsGE(AType)) {
+ // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+ // single-bit constant.
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+ unsigned ShCtV = N2C->getAPIntValue().logBase2();
+ ShCtV = XType.getSizeInBits()-ShCtV-1;
+ SDValue ShCt = DAG.getConstant(ShCtV,
+ getShiftAmountTy(N0.getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
+ XType, N0, ShCt);
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
+ XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+ }
+
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT &&
+ N1C && N1C->isNullValue() &&
+ N2C && N2C->isNullValue()) {
+ SDValue AndLHS = N0->getOperand(0);
+ ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ // Shift the tested bit over the sign bit.
+ APInt AndMask = ConstAndRHS->getAPIntValue();
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is either
+ // all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(AndMask.getBitWidth()-1,
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+ TLI.getBooleanContents(N0.getValueType().isVector()) ==
+ TargetLowering::ZeroOrOneBooleanContent) {
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->getAPIntValue() == 1)
+ return SDValue();
+
+ // Get a SetCC of the condition
+ // NOTE: Don't create a SETCC if it's not legal on this target.
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC,
+ LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) {
+ SDValue Temp, SCC;
+ // cast from setcc result type to select result type
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ if (N2.getValueType().bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(),
+ N2.getValueType());
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ } else {
+ SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ }
+
+ AddToWorkList(SCC.getNode());
+ AddToWorkList(Temp.getNode());
+
+ if (N2C->getAPIntValue() == 1)
+ return Temp;
+
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ getShiftAmountTy(Temp.getValueType())));
+ }
+ }
+
+ // Check to see if this is the equivalent of setcc
+ // FIXME: Turn all of these into setcc if setcc if setcc is legal
+ // otherwise, go ahead with the folds.
+ if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+ EVT XType = N0.getValueType();
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
+ SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
+ if (Res.getValueType() != VT)
+ Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+ return Res;
+ }
+
+ // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::CTLZ, XType))) {
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
+ return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
+ DAG.getConstant(Log2_32(XType.getSizeInBits()),
+ getShiftAmountTy(Ctlz.getValueType())));
+ }
+ // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+ SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
+ XType, DAG.getConstant(0, XType), N0);
+ SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
+ return DAG.getNode(ISD::SRL, DL, XType,
+ DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(XType)));
+ }
+ // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+ SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+ }
+ }
+
+ // Check to see if this is an integer abs.
+ // select_cc setg[te] X, 0, X, -X ->
+ // select_cc setgt X, -1, X, -X ->
+ // select_cc setl[te] X, 0, -X, X ->
+ // select_cc setlt X, 1, -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C) {
+ ConstantSDNode *SubC = NULL;
+ if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
+ else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
+ (N1C->isOne() && CC == ISD::SETLT)) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
+
+ EVT XType = N0.getValueType();
+ if (SubC && SubC->isNullValue() && XType.isInteger()) {
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+ N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy(N0.getValueType())));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+ XType, N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
+ SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself. Provides base object and offset as
+// results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+ const GlobalValue *&GV, const void *&CV) {
+ // Assume it is a primitive operation.
+ Base = Ptr; Offset = 0; GV = 0; CV = 0;
+
+ // If it's an adding a simple constant then integrate the offset.
+ if (Base.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+ Base = Base.getOperand(0);
+ Offset += C->getZExtValue();
+ }
+ }
+
+ // Return the underlying GlobalValue, and update the Offset. Return false
+ // for GlobalAddressSDNode since the same GlobalAddress may be represented
+ // by multiple nodes with different offsets.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+ GV = G->getGlobal();
+ Offset += G->getOffset();
+ return false;
+ }
+
+ // Return the underlying Constant value, and update the Offset. Return false
+ // for ConstantSDNodes since the same constant pool entry may be represented
+ // by multiple nodes with different offsets.
+ if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+ CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
+ : (const void *)C->getConstVal();
+ Offset += C->getOffset();
+ return false;
+ }
+ // If it's any of the following then it can't alias with anything but itself.
+ return isa<FrameIndexSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const {
+ // If they are the same then they must be aliases.
+ if (Ptr1 == Ptr2) return true;
+
+ // Gather base node and offset information.
+ SDValue Base1, Base2;
+ int64_t Offset1, Offset2;
+ const GlobalValue *GV1, *GV2;
+ const void *CV1, *CV2;
+ bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+ bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+
+ // If they have a same base address then check to see if they overlap.
+ if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+
+ // It is possible for different frame indices to alias each other, mostly
+ // when tail call optimization reuses return address slots for arguments.
+ // To catch this case, look up the actual index of frame indices to compute
+ // the real alias relationship.
+ if (isFrameIndex1 && isFrameIndex2) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ }
+
+ // Otherwise, if we know what the bases are, and they aren't identical, then
+ // we know they cannot alias.
+ if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+ return false;
+
+ // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+ // compared to the size and offset of the access, we may be able to prove they
+ // do not alias. This check is conservative for now to catch cases created by
+ // splitting vector types.
+ if ((SrcValueAlign1 == SrcValueAlign2) &&
+ (SrcValueOffset1 != SrcValueOffset2) &&
+ (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+ int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+ int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+
+ // There is no overlap between these relatively aligned accesses of similar
+ // size, return no alias.
+ if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+ return false;
+ }
+
+ if (CombinerGlobalAA) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
+ int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
+ int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+ AliasAnalysis::AliasResult AAResult =
+ AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+ AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
+ if (AAResult == AliasAnalysis::NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
+ SDValue Ptr0, Ptr1;
+ int64_t Size0, Size1;
+ const Value *SrcValue0, *SrcValue1;
+ int SrcValueOffset0, SrcValueOffset1;
+ unsigned SrcValueAlign0, SrcValueAlign1;
+ const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
+ FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0,
+ SrcValueAlign0, SrcTBAAInfo0);
+ FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1,
+ SrcValueAlign1, SrcTBAAInfo1);
+ return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0,
+ SrcValueAlign0, SrcTBAAInfo0,
+ Ptr1, Size1, SrcValue1, SrcValueOffset1,
+ SrcValueAlign1, SrcTBAAInfo1);
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node. Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue,
+ int &SrcValueOffset,
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
+ LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
+
+ Ptr = LS->getBasePtr();
+ Size = LS->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LS->getSrcValue();
+ SrcValueOffset = LS->getSrcValueOffset();
+ SrcValueAlign = LS->getOriginalAlignment();
+ TBAAInfo = LS->getTBAAInfo();
+ return isa<LoadSDNode>(LS);
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases) {
+ SmallVector<SDValue, 8> Chains; // List of chains to visit.
+ SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
+
+ // Get alias information for node.
+ SDValue Ptr;
+ int64_t Size;
+ const Value *SrcValue;
+ int SrcValueOffset;
+ unsigned SrcValueAlign;
+ const MDNode *SrcTBAAInfo;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+ SrcValueAlign, SrcTBAAInfo);
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+ unsigned Depth = 0;
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we find two aliases. If we've seen two aliases, assume we'll
+ // find more and revert to original chain since the xform is unlikely to be
+ // profitable.
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
+ // chain we found before we hit a tokenfactor rather than the original
+ // chain.
+ if (Depth > 6 || Aliases.size() == 2) {
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ break;
+ }
+
+ // Don't bother if we've been before.
+ if (!Visited.insert(Chain.getNode()))
+ continue;
+
+ switch (Chain.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry token is ideal chain operand, but handled in FindBetterChain.
+ break;
+
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for Chain.
+ SDValue OpPtr;
+ int64_t OpSize;
+ const Value *OpSrcValue;
+ int OpSrcValueOffset;
+ unsigned OpSrcValueAlign;
+ const MDNode *OpSrcTBAAInfo;
+ bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
+ OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign,
+ OpSrcTBAAInfo);
+
+ // If chain is alias then stop here.
+ if (!(IsLoad && IsOpLoad) &&
+ isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+ SrcTBAAInfo,
+ OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign, OpSrcTBAAInfo)) {
+ Aliases.push_back(Chain);
+ } else {
+ // Look further up the chain.
+ Chains.push_back(Chain.getOperand(0));
+ ++Depth;
+ }
+ break;
+ }
+
+ case ISD::TokenFactor:
+ // We have to check each of the operands of the token factor for "small"
+ // token factors, so we queue them up. Adding the operands to the queue
+ // (stack) in reverse order maintains the original order and increases the
+ // likelihood that getNode will find a matching token factor (CSE.)
+ if (Chain.getNumOperands() > 16) {
+ Aliases.push_back(Chain);
+ break;
+ }
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ ++Depth;
+ break;
+
+ default:
+ // For all other instructions we will just have to take what we can get.
+ Aliases.push_back(Chain);
+ break;
+ }
+ }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+ SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ // If no operands then chain to entry token.
+ if (Aliases.size() == 0)
+ return DAG.getEntryNode();
+
+ // If a single operand then chain to it. We don't need to revisit it.
+ if (Aliases.size() == 1)
+ return Aliases[0];
+
+ // Construct a custom tailored token factor.
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ &Aliases[0], Aliases.size());
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+ CodeGenOpt::Level OptLevel) {
+ /// run - This is the main entry point to this class.
+ ///
+ DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 0000000..9ac738e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,1507 @@
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+ "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+ "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
+/// startNewBlock - Set the current block to which generated machine
+/// instructions will be appended, and clear the local CSE map.
+///
+void FastISel::startNewBlock() {
+ LocalValueMap.clear();
+
+ EmitStartPt = 0;
+
+ // Advance the emit start point past any EH_LABEL instructions.
+ MachineBasicBlock::iterator
+ I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
+ while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
+ EmitStartPt = I;
+ ++I;
+ }
+ LastLocalValue = EmitStartPt;
+}
+
+bool FastISel::LowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ // Fallback to SDISel argument lowering code to deal with sret pointer
+ // parameter.
+ return false;
+
+ if (!FastLowerArguments())
+ return false;
+
+ // Enter non-dead arguments into ValueMap for uses in non-entry BBs.
+ for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
+ E = FuncInfo.Fn->arg_end(); I != E; ++I) {
+ if (!I->use_empty()) {
+ DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
+ assert(VI != LocalValueMap.end() && "Missed an argument?");
+ FuncInfo.ValueMap[I] = VI->second;
+ }
+ }
+ return true;
+}
+
+void FastISel::flushLocalValueMap() {
+ LocalValueMap.clear();
+ LastLocalValue = EmitStartPt;
+ recomputeInsertPt();
+}
+
+bool FastISel::hasTrivialKill(const Value *V) const {
+ // Don't consider constants or arguments to have trivial kills.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // No-op casts are trivially coalesced by fast-isel.
+ if (const CastInst *Cast = dyn_cast<CastInst>(I))
+ if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+ !hasTrivialKill(Cast->getOperand(0)))
+ return false;
+
+ // GEPs with all zero indices are trivially coalesced by fast-isel.
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
+ return false;
+
+ // Only instructions with a single use in the same basic block are considered
+ // to have trivial kills.
+ return I->hasOneUse() &&
+ !(I->getOpcode() == Instruction::BitCast ||
+ I->getOpcode() == Instruction::PtrToInt ||
+ I->getOpcode() == Instruction::IntToPtr) &&
+ cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
+}
+
+unsigned FastISel::getRegForValue(const Value *V) {
+ EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return 0;
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+ else
+ return 0;
+ }
+
+ // Look up the value to see if we already have a register for it.
+ unsigned Reg = lookUpRegForValue(V);
+ if (Reg != 0)
+ return Reg;
+
+ // In bottom-up mode, just create the virtual register which will be used
+ // to hold the value. It will be materialized later.
+ if (isa<Instruction>(V) &&
+ (!isa<AllocaInst>(V) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+ return FuncInfo.InitializeRegForValue(V);
+
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ // Materialize the value in a register. Emit any instructions in the
+ // local value area.
+ Reg = materializeRegForValue(V, VT);
+
+ leaveLocalValueArea(SaveInsertPt);
+
+ return Reg;
+}
+
+/// materializeRegForValue - Helper for getRegForValue. This function is
+/// called when the value isn't already available in a register and must
+/// be materialized with new instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V)) {
+ Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg =
+ getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ if (CF->isNullValue()) {
+ Reg = TargetMaterializeFloatZero(CF);
+ } else {
+ // Try to emit the constant directly.
+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+ }
+
+ if (!Reg) {
+ // Try to emit the constant by using an integer constant with a cast.
+ const APFloat &Flt = CF->getValueAPF();
+ EVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ APInt IntVal(IntBitWidth, x);
+
+ unsigned IntegerReg =
+ getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ if (IntegerReg != 0)
+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+ IntegerReg, /*Kill=*/false);
+ }
+ }
+ } else if (const Operator *Op = dyn_cast<Operator>(V)) {
+ if (!SelectOperator(Op, Op->getOpcode()))
+ if (!isa<Instruction>(Op) ||
+ !TargetSelectInstruction(cast<Instruction>(Op)))
+ return 0;
+ Reg = lookUpRegForValue(Op);
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
+
+ // If target-independent code couldn't handle the value, give target-specific
+ // code a try.
+ if (!Reg && isa<Constant>(V))
+ Reg = TargetMaterializeConstant(cast<Constant>(V));
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg != 0) {
+ LocalValueMap[V] = Reg;
+ LastLocalValue = MRI.getVRegDef(Reg);
+ }
+ return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(const Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+ return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value. It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return;
+ }
+
+ unsigned &AssignedReg = FuncInfo.ValueMap[I];
+ if (AssignedReg == 0)
+ // Use the new register.
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+ for (unsigned i = 0; i < NumRegs; i++)
+ FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
+
+ AssignedReg = Reg;
+ }
+}
+
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+void FastISel::recomputeInsertPt() {
+ if (getLastLocalValue()) {
+ FuncInfo.InsertPt = getLastLocalValue();
+ FuncInfo.MBB = FuncInfo.InsertPt->getParent();
+ ++FuncInfo.InsertPt;
+ } else
+ FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+ // Now skip past any EH_LABELs, which must remain at the beginning.
+ while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+ FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+ ++FuncInfo.InsertPt;
+}
+
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+ while (I != E) {
+ MachineInstr *Dead = &*I;
+ ++I;
+ Dead->eraseFromParent();
+ ++NumFastIselDead;
+ }
+ recomputeInsertPt();
+}
+
+FastISel::SavePoint FastISel::enterLocalValueArea() {
+ MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+ DebugLoc OldDL = DL;
+ recomputeInsertPt();
+ DL = DebugLoc();
+ SavePoint SP = { OldInsertPt, OldDL };
+ return SP;
+}
+
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
+ if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+ LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+
+ // Restore the previous insert position.
+ FuncInfo.InsertPt = OldInsertPt.InsertPt;
+ DL = OldInsertPt.DL;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
+ EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 &&
+ (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
+ VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+ else
+ return false;
+ }
+
+ // Check if the first operand is a constant, and handle it as "ri". At -O0,
+ // we don't have anything that canonicalizes operand order.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0) return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
+ Op1IsKill, CI->getZExtValue(),
+ VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t Imm = CI->getZExtValue();
+
+ // Transform "sdiv exact X, 8" -> "sra X, 3".
+ if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+ cast<BinaryOperator>(I)->isExact() &&
+ isPowerOf2_64(Imm)) {
+ Imm = Log2_64(Imm);
+ ISDOpcode = ISD::SRA;
+ }
+
+ // Transform "urem x, pow2" -> "and x, pow2-1".
+ if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+ isPowerOf2_64(Imm)) {
+ --Imm;
+ ISDOpcode = ISD::AND;
+ }
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Op0IsKill, Imm, VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Check if the second operand is a constant float.
+ if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op0IsKill, CF);
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ // Now we have both operands in registers. Emit the instruction.
+ unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode,
+ Op0, Op0IsKill,
+ Op1, Op1IsKill);
+ if (ResultReg == 0)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectGetElementPtr(const User *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ // FIXME: What's a good SWAG number for MaxOffs?
+ uint64_t MaxOffs = 2048;
+ Type *Ty = I->getOperand(0)->getType();
+ MVT VT = TLI.getPointerTy();
+ for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
+ E = I->op_end(); OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero()) continue;
+ // N = N + Offset
+ TotalOffs +=
+ TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ continue;
+ }
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ IdxNIsKill = true;
+ }
+ N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::SelectCall(const User *I) {
+ const CallInst *Call = cast<CallInst>(I);
+
+ // Handle simple inline asms.
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
+ // Don't attempt to handle constraints.
+ if (!IA->getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::INLINEASM))
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
+ return true;
+ }
+
+ MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+ ComputeUsesVAFloatArgument(*Call, &MMI);
+
+ const Function *F = Call->getCalledFunction();
+ if (!F) return false;
+
+ // Handle selected intrinsic function calls.
+ switch (F->getIntrinsicID()) {
+ default: break;
+ // At -O0 we don't care about the lifetime intrinsics.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // The donothing intrinsic does, well, nothing.
+ case Intrinsic::donothing:
+ return true;
+
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
+ if (!DIVariable(DI->getVariable()).Verify() ||
+ !FuncInfo.MF->getMMI().hasDebugInfo()) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ const Value *Address = DI->getAddress();
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ unsigned Reg = 0;
+ unsigned Offset = 0;
+ if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+ // Some arguments' frame index is recorded during argument lowering.
+ Offset = FuncInfo.getArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ }
+ if (!Reg)
+ Reg = lookUpRegForValue(Address);
+
+ // If we have a VLA that has a "use" in a metadata node that's then used
+ // here but it has no other uses, then we have a problem. E.g.,
+ //
+ // int foo (const int *x) {
+ // char a[*x];
+ // return 0;
+ // }
+ //
+ // If we assign 'a' a vreg and fast isel later on has to use the selection
+ // DAG isel, it will want to copy the value to the vreg. However, there are
+ // no uses, which goes counter to what selection DAG isel expects.
+ if (!Reg && !Address->use_empty() && isa<Instruction>(Address) &&
+ (!isa<AllocaInst>(Address) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+ Reg = FuncInfo.InitializeRegForValue(Address);
+
+ if (Reg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset)
+ .addMetadata(DI->getVariable());
+ else
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst *DI = cast<DbgValueInst>(Call);
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ const Value *V = DI->getValue();
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(0U).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addCImm(CI).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addFPImm(CF).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (unsigned Reg = lookUpRegForValue(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
+ return true;
+ }
+ case Intrinsic::objectsize: {
+ ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
+ unsigned long long Res = CI->isZero() ? -1ULL : 0;
+ Constant *ResCI = ConstantInt::get(Call->getType(), Res);
+ unsigned ResultReg = getRegForValue(ResCI);
+ if (ResultReg == 0)
+ return false;
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
+ case Intrinsic::expect: {
+ unsigned ResultReg = getRegForValue(Call->getArgOperand(0));
+ if (ResultReg == 0)
+ return false;
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
+ }
+
+ // Usually, it does not make sense to initialize a value,
+ // make an unrelated function call and use the value, because
+ // it tends to be spilled on the stack. So, we move the pointer
+ // to the last local value to the beginning of the block, so that
+ // all the values which have already been materialized,
+ // appear after the call. It also makes sense to skip intrinsics
+ // since they tend to be inlined.
+ if (!isa<IntrinsicInst>(Call))
+ flushLocalValueMap();
+
+ // An arbitrary call. Bail.
+ return false;
+}
+
+bool FastISel::SelectCast(const User *I, unsigned Opcode) {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal.
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ // Check if the source operand is legal.
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+ DstVT.getSimpleVT(),
+ Opcode,
+ InputReg, InputRegIsKill);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectBitCast(const User *I) {
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (I->getType() == I->getOperand(0)->getType()) {
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ // Bitcasts of other values become reg-reg copies or BITCAST operators.
+ EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstEVT = TLI.getValueType(I->getType());
+ if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
+ !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DstVT = DstEVT.getSimpleVT();
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // First, try to perform the bitcast by inserting a reg-reg copy.
+ unsigned ResultReg = 0;
+ if (SrcVT == DstVT) {
+ const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ // Don't attempt a cross-class copy. It will likely fail.
+ if (SrcClass == DstClass) {
+ ResultReg = createResultReg(DstClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0);
+ }
+ }
+
+ // If the reg-reg copy failed, select a BITCAST opcode.
+ if (!ResultReg)
+ ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectInstruction(const Instruction *I) {
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (isa<TerminatorInst>(I))
+ if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+ return false;
+
+ DL = I->getDebugLoc();
+
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
+ if (const CallInst *Call = dyn_cast<CallInst>(I)) {
+ const Function *F = Call->getCalledFunction();
+ LibFunc::Func Func;
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func))
+ return false;
+ }
+
+ // First, try doing target-independent selection.
+ if (SelectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
+ DL = DebugLoc();
+ return true;
+ }
+ // Remove dead code. However, ignore call instructions since we've flushed
+ // the local value map and recomputed the insert point.
+ if (!isa<CallInst>(I)) {
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ }
+
+ // Next, try calling the target to attempt to handle the instruction.
+ SavedInsertPt = FuncInfo.InsertPt;
+ if (TargetSelectInstruction(I)) {
+ ++NumFastIselSuccessTarget;
+ DL = DebugLoc();
+ return true;
+ }
+ // Check for dead code and remove as necessary.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+
+ DL = DebugLoc();
+ return false;
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+
+ if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
+ FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // For more accurate line information if this is the only instruction
+ // in the block then emit it, otherwise we have the unconditional
+ // fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
+ SmallVector<MachineOperand, 0>(), DL);
+ }
+ FuncInfo.MBB->addSuccessor(MSucc);
+}
+
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(const User *I) {
+ unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+ if (OpReg == 0) return false;
+
+ bool OpRegIsKill = hasTrivialKill(I);
+
+ // If the target has ISD::FNEG, use it.
+ EVT VT = TLI.getValueType(I->getType());
+ unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::FNEG, OpReg, OpRegIsKill);
+ if (ResultReg != 0) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Bitcast the value to integer, twiddle the sign bit with xor,
+ // and then bitcast it back to floating-point.
+ if (VT.getSizeInBits() > 64) return false;
+ EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+ if (!TLI.isTypeLegal(IntVT))
+ return false;
+
+ unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::BITCAST, OpReg, OpRegIsKill);
+ if (IntReg == 0)
+ return false;
+
+ unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
+ IntReg, /*Kill=*/true,
+ UINT64_C(1) << (VT.getSizeInBits()-1),
+ IntVT.getSimpleVT());
+ if (IntResultReg == 0)
+ return false;
+
+ ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::BITCAST, IntResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectExtractValue(const User *U) {
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI)
+ return false;
+
+ // Make sure we only try to handle extracts with a legal result. But also
+ // allow i1 because it's easy.
+ EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
+ if (!RealVT.isSimple())
+ return false;
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+ return false;
+
+ const Value *Op0 = EVI->getOperand(0);
+ Type *AggTy = Op0->getType();
+
+ // Get the base result register.
+ unsigned ResultReg;
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+ if (I != FuncInfo.ValueMap.end())
+ ResultReg = I->second;
+ else if (isa<Instruction>(Op0))
+ ResultReg = FuncInfo.InitializeRegForValue(Op0);
+ else
+ return false; // fast-isel can't handle aggregate constants at the moment
+
+ // Get the actual result register, which is an offset from the base register.
+ unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+
+ for (unsigned i = 0; i < VTIndex; i++)
+ ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+ UpdateValueMap(EVI, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectOperator(const User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return SelectBinaryOp(I, ISD::ADD);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::Sub:
+ return SelectBinaryOp(I, ISD::SUB);
+ case Instruction::FSub:
+ // FNeg is currently represented in LLVM IR as a special case of FSub.
+ if (BinaryOperator::isFNeg(I))
+ return SelectFNeg(I);
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::Mul:
+ return SelectBinaryOp(I, ISD::MUL);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return SelectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return SelectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return SelectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return SelectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return SelectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return SelectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return SelectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return SelectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return SelectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return SelectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return SelectBinaryOp(I, ISD::XOR);
+
+ case Instruction::GetElementPtr:
+ return SelectGetElementPtr(I);
+
+ case Instruction::Br: {
+ const BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ const BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+ FastEmitBranch(MSucc, BI->getDebugLoc());
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ // Nothing to emit.
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ return SelectCall(I);
+
+ case Instruction::BitCast:
+ return SelectBitCast(I);
+
+ case Instruction::FPToSI:
+ return SelectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return SelectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return SelectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return SelectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return SelectCast(I, ISD::TRUNCATE);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ case Instruction::ExtractValue:
+ return SelectExtractValue(I);
+
+ case Instruction::PHI:
+ llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FuncInfo(funcInfo),
+ MRI(FuncInfo.MF->getRegInfo()),
+ MFI(*FuncInfo.MF->getFrameInfo()),
+ MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()),
+ TD(*TM.getDataLayout()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()),
+ TRI(*TM.getRegisterInfo()),
+ LibInfo(libInfo) {
+}
+
+FastISel::~FastISel() {}
+
+bool FastISel::FastLowerArguments() {
+ return false;
+}
+
+unsigned FastISel::FastEmit_(MVT, MVT,
+ unsigned) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT, MVT,
+ unsigned, const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm, MVT ImmType) {
+ // If this is a multiply by a power of two, emit this as a shift left.
+ if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+ Opcode = ISD::SHL;
+ Imm = Log2_64(Imm);
+ } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+ // div x, 8 -> srl x, 3
+ Opcode = ISD::SRL;
+ Imm = Log2_64(Imm);
+ }
+
+ // Horrible hack (to be removed), check to make sure shift amounts are
+ // in-range.
+ if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+ Imm >= VT.getSizeInBits())
+ return 0;
+
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ if (ResultReg != 0)
+ return ResultReg;
+ unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (MaterialReg == 0) {
+ // This is a bit ugly/slow, but failing here means falling out of
+ // fast-isel, which would be very slow.
+ IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+ VT.getSizeInBits());
+ MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+ assert (MaterialReg != 0 && "Unable to materialize imm.");
+ if (MaterialReg == 0) return 0;
+ }
+ return FastEmit_rr(VT, VT, Opcode,
+ Op0, Op0IsKill,
+ MaterialReg, /*Kill=*/true);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ const TargetRegisterClass *RC = MRI.getRegClass(Op0);
+ MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx);
+ return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+}
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input. We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB. As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::const_iterator I = SuccBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary because FastISel doesn't
+ // use CreateRegs to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
+ else {
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ // Set the DebugLoc for the copy. Prefer the location of the operand
+ // if there is one; use the location of the PHI otherwise.
+ DL = PN->getDebugLoc();
+ if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+ DL = Inst->getDebugLoc();
+
+ unsigned Reg = getRegForValue(PHIOp);
+ if (Reg == 0) {
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ DL = DebugLoc();
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 0000000..b46edad
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,483 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "function-lowering-info"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+ if (I->use_empty()) return false;
+ if (isa<PHINode>(I)) return true;
+ const BasicBlock *BB = I->getParent();
+ for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
+ return true;
+ }
+ return false;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
+ : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
+ Fn = &fn;
+ MF = &mf;
+ RegInfo = &MF->getRegInfo();
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI);
+ CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
+ Fn->isVarArg(),
+ Outs, Fn->getContext());
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ Function::const_iterator BB = Fn->begin(), EB = Fn->end();
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+
+ // The object may need to be placed onto the stack near the stack
+ // protector if one exists. Determine here if this object is a suitable
+ // candidate. I.e., it would trigger the creation of a stack protector.
+ bool MayNeedSP =
+ (AI->isArrayAllocation() ||
+ (TySize >= 8 && isa<ArrayType>(Ty) &&
+ cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
+ StaticAllocaMap[AI] =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
+ MayNeedSP, AI);
+ }
+
+ for (; BB != EB; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Mark values used outside their block as exported, by allocating
+ // a virtual register for them.
+ if (isUsedOutsideOfDefiningBlock(I))
+ if (!isa<AllocaInst>(I) ||
+ !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(I);
+
+ // Collect llvm.dbg.declare information. This is done now instead of
+ // during the initial isel pass through the IR so that it is done
+ // in a predictable order.
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+ MachineModuleInfo &MMI = MF->getMMI();
+ if (MMI.hasDebugInfo() &&
+ DIVariable(DI->getVariable()).Verify() &&
+ !DI->getDebugLoc().isUnknown()) {
+ // Don't handle byval struct arguments or VLAs, for example.
+ // Non-byval arguments are handled here (they refer to the stack
+ // temporary alloca at this point).
+ const Value *Address = DI->getAddress();
+ if (Address) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ DenseMap<const AllocaInst *, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI != StaticAllocaMap.end()) { // Check for VLAs.
+ int FI = SI->second;
+ MMI.setVariableDbgInfo(DI->getVariable(),
+ FI, DI->getDebugLoc());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (BB = Fn->begin(); BB != EB; ++BB) {
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+ MBBMap[BB] = MBB;
+ MF->push_back(MBB);
+
+ // Transfer the address-taken flag. This is necessary because there could
+ // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+ // the first one should be marked.
+ if (BB->hasAddressTaken())
+ MBB->setHasAddressTaken();
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ for (BasicBlock::const_iterator I = BB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (PN->use_empty()) continue;
+
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
+ DebugLoc DL = PN->getDebugLoc();
+ unsigned PHIReg = ValueMap[PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+ PHIReg += NumRegisters;
+ }
+ }
+ }
+
+ // Mark landing pad blocks.
+ for (BB = Fn->begin(); BB != EB; ++BB)
+ if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+ MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+ assert(CatchInfoFound.size() == CatchInfoLost.size() &&
+ "Not all catch info was assigned to a landing pad!");
+
+ MBBMap.clear();
+ ValueMap.clear();
+ StaticAllocaMap.clear();
+#ifndef NDEBUG
+ CatchInfoLost.clear();
+ CatchInfoFound.clear();
+#endif
+ LiveOutRegInfo.clear();
+ VisitedBBs.clear();
+ ArgDbgValues.clear();
+ ByValArgFrameIndexMap.clear();
+ RegFixups.clear();
+}
+
+/// CreateReg - Allocate a single virtual register for the given type.
+unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegs - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, Ty, ValueVTs);
+
+ unsigned FirstReg = 0;
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
+
+ unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ unsigned R = CreateReg(RegisterVT);
+ if (!FirstReg) FirstReg = R;
+ }
+ }
+ return FirstReg;
+}
+
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+ if (!LiveOutRegInfo.inBounds(Reg))
+ return NULL;
+
+ LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+ if (!LOI->IsValid)
+ return NULL;
+
+ if (BitWidth > LOI->KnownZero.getBitWidth()) {
+ LOI->NumSignBits = 1;
+ LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
+ LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+ }
+
+ return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+ Type *Ty = PN->getType();
+ if (!Ty->isIntegerTy() || Ty->isVectorTy())
+ return;
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, Ty, ValueVTs);
+ assert(ValueVTs.size() == 1 &&
+ "PHIs with non-vector integer types should have a single VT.");
+ EVT IntVT = ValueVTs[0];
+
+ if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1)
+ return;
+ IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT);
+ unsigned BitWidth = IntVT.getSizeInBits();
+
+ unsigned DestReg = ValueMap[PN];
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ return;
+ LiveOutRegInfo.grow(DestReg);
+ LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+ Value *V = PN->getIncomingValue(0);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = Val.getNumSignBits();
+ DestLOI.KnownZero = ~Val;
+ DestLOI.KnownOne = Val;
+ } else {
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+ "CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI = *SrcLOI;
+ }
+
+ assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
+ DestLOI.KnownOne.getBitWidth() == BitWidth &&
+ "Masks should have the same bit width as the type.");
+
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+ DestLOI.KnownZero &= ~Val;
+ DestLOI.KnownOne &= Val;
+ continue;
+ }
+
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+ "its CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+ DestLOI.KnownZero &= SrcLOI->KnownZero;
+ DestLOI.KnownOne &= SrcLOI->KnownOne;
+ }
+}
+
+/// setArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
+ int FI) {
+ ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
+ DenseMap<const Argument *, int>::iterator I =
+ ByValArgFrameIndexMap.find(A);
+ if (I != ByValArgFrameIndexMap.end())
+ return I->second;
+ DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
+ return 0;
+}
+
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+ MachineModuleInfo *MMI)
+{
+ FunctionType *FT = cast<FunctionType>(
+ I.getCalledValue()->getType()->getContainedType(0));
+ if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (i->isFloatingPointTy()) {
+ MMI->setUsesVAFloatArgument(true);
+ return;
+ }
+ }
+ }
+ }
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB) {
+ // Inform the MachineModuleInfo of the personality for this landing pad.
+ const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
+ assert(CE->getOpcode() == Instruction::BitCast &&
+ isa<Function>(CE->getOperand(0)) &&
+ "Personality should be a function");
+ MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+ // Gather all the type infos for this landing pad and pass them along to
+ // MachineModuleInfo.
+ std::vector<const GlobalVariable *> TyInfo;
+ unsigned N = I.getNumArgOperands();
+
+ for (unsigned i = N - 1; i > 1; --i) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
+ unsigned FilterLength = CI->getZExtValue();
+ unsigned FirstCatch = i + FilterLength + !FilterLength;
+ assert(FirstCatch <= N && "Invalid filter length");
+
+ if (FirstCatch < N) {
+ TyInfo.reserve(N - FirstCatch);
+ for (unsigned j = FirstCatch; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ if (!FilterLength) {
+ // Cleanup.
+ MMI->addCleanup(MBB);
+ } else {
+ // Filter.
+ TyInfo.reserve(FilterLength - 1);
+ for (unsigned j = i + 1; j < FirstCatch; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addFilterTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ N = i;
+ }
+ }
+
+ if (N > 2) {
+ TyInfo.reserve(N - 2);
+ for (unsigned j = 2; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ }
+}
+
+/// AddLandingPadInfo - Extract the exception handling information from the
+/// landingpad instruction and add them to the specified machine module info.
+void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
+ MachineBasicBlock *MBB) {
+ MMI.addPersonality(MBB,
+ cast<Function>(I.getPersonalityFn()->stripPointerCasts()));
+
+ if (I.isCleanup())
+ MMI.addCleanup(MBB);
+
+ // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
+ // but we need to do it this way because of how the DWARF EH emitter
+ // processes the clauses.
+ for (unsigned i = I.getNumClauses(); i != 0; --i) {
+ Value *Val = I.getClause(i - 1);
+ if (I.isCatch(i - 1)) {
+ MMI.addCatchTypeInfo(MBB,
+ dyn_cast<GlobalVariable>(Val->stripPointerCasts()));
+ } else {
+ // Add filters in a list.
+ Constant *CVal = cast<Constant>(Val);
+ SmallVector<const GlobalVariable*, 4> FilterList;
+ for (User::op_iterator
+ II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
+ FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts()));
+
+ MMI.addFilterTypeInfo(MBB, FilterList);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 0000000..3b1abd7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,982 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instr-emitter"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// MinRCSize - Smallest register class we allow when constraining virtual
+/// registers. If satisfying all register class constraints would require
+/// using a smaller register class, emit a COPY to a new virtual register
+/// instead.
+const unsigned MinRCSize = 4;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional glue operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// countOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional glue operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+///
+/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
+/// the chain and glue. These operands may be implicit on the machine instr.
+static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
+ unsigned &NumImpUses) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+
+ // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
+ NumImpUses = N - NumExpUses;
+ for (unsigned I = N; I > NumExpUses; --I) {
+ if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
+ continue;
+ if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
+ if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))
+ continue;
+ NumImpUses = N - I;
+ break;
+ }
+
+ return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+ unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Just use the input register directly!
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ return;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ bool MatchReg = true;
+ const TargetRegisterClass *UseRC = NULL;
+ MVT VT = Node->getSimpleValueType(ResNo);
+
+ // Stick to the preferred register classes for legal types.
+ if (TLI->isTypeLegal(VT))
+ UseRC = TLI->getRegClassFor(VT);
+
+ if (!IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ MVT VT = Node->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Glue)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC = 0;
+ if (i+II.getNumDefs() < II.getNumOperands()) {
+ RC = TRI->getAllocatableClass(
+ TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
+ }
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ const TargetRegisterClass *ComRC =
+ TRI->getCommonSubClass(UseRC, RC);
+ // If multiple uses expect disjoint register classes, we emit
+ // copies in AddRegisterOperand.
+ if (ComRC)
+ UseRC = ComRC;
+ }
+ }
+ }
+ }
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
+
+ const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+ SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
+
+ // Figure out the register class to create for the destreg.
+ if (VRBase) {
+ DstRC = MRI->getRegClass(VRBase);
+ } else if (UseRC) {
+ assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+ DstRC = UseRC;
+ } else {
+ DstRC = TLI->getRegClassFor(VT);
+ }
+
+ // If all uses are reading from the src physical register and copying the
+ // register is either impossible or very expensive, then don't create a copy.
+ if (MatchReg && SrcRC->getCopyCost() < 0) {
+ VRBase = SrcReg;
+ } else {
+ // Create the reg, emit the copy.
+ VRBase = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ VRBase).addReg(SrcReg);
+ }
+
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const {
+ if (!Node->hasOneUse())
+ return 0;
+
+ SDNode *User = *Node->use_begin();
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return Reg;
+ }
+ return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+ for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg in the same register class, use the CopyToReg'd destination
+ // register instead of creating a new vreg.
+ unsigned VRBase = 0;
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
+ if (II.OpInfo[i].isOptionalDef()) {
+ // Optional def must be a physical register.
+ unsigned NumResults = CountResults(Node);
+ VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ if (!VRBase && !IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == i) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+ if (RegRC == RC) {
+ VRBase = Reg;
+ MIB.addReg(VRBase, RegState::Define);
+ break;
+ }
+ }
+ }
+ }
+
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ if (VRBase == 0) {
+ assert(RC && "Isn't a register operand!");
+ VRBase = MRI->createVirtualRegister(RC);
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Add an IMPLICIT_DEF instruction before every use.
+ unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+ // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
+ // does not include operand register class info.
+ if (!VReg) {
+ const TargetRegisterClass *RC =
+ TLI->getRegClassFor(Op.getSimpleValueType());
+ VReg = MRI->createVirtualRegister(RC);
+ }
+ BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ return VReg;
+ }
+
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ // Get/emit the operand.
+ unsigned VReg = getVR(Op, VRBaseMap);
+ assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+ const MCInstrDesc &MCID = MIB->getDesc();
+ bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+ MCID.OpInfo[IIOpNum].isOptionalDef();
+
+ // If the instruction requires a register in a different class, create
+ // a new virtual register and copy the value into it, but first attempt to
+ // shrink VReg's register class within reason. For example, if VReg == GR32
+ // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
+ if (II) {
+ const TargetRegisterClass *DstRC = 0;
+ if (IIOpNum < II->getNumOperands())
+ DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
+ if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ }
+ }
+
+ // If this value has only one use, that use is a kill. This is a
+ // conservative approximation. InstrEmitter does trivial coalescing
+ // with CopyFromReg nodes, so don't emit kill flags for them.
+ // Avoid kill flags on Schedule cloned nodes, since there will be
+ // multiple uses.
+ // Tied operands are never killed, so we need to check that. And that
+ // means we need to determine the index of the operand.
+ bool isKill = Op.hasOneUse() &&
+ Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+ !IsDebug &&
+ !(IsClone || IsCloned);
+ if (isKill) {
+ unsigned Idx = MIB->getNumOperands();
+ while (Idx > 0 &&
+ MIB->getOperand(Idx-1).isReg() &&
+ MIB->getOperand(Idx-1).isImplicit())
+ --Idx;
+ bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
+ if (isTied)
+ isKill = false;
+ }
+
+ MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) |
+ getDebugRegState(IsDebug));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding.
+void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ if (Op.isMachineOpcode()) {
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ MIB.addImm(C->getSExtValue());
+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+ MIB.addFPImm(F->getConstantFPValue());
+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ // Turn additional physreg operands into implicit uses on non-variadic
+ // instructions. This is used by call and return instructions passing
+ // arguments in registers.
+ bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
+ MIB.addReg(R->getReg(), getImplRegState(Imp));
+ } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+ MIB.addRegMask(RM->getRegMask());
+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(),
+ TGA->getTargetFlags());
+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+ MIB.addMBB(BBNode->getBasicBlock());
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ MIB.addFrameIndex(FI->getIndex());
+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+ MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags());
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ unsigned Align = CP->getAlignment();
+ Type *Type = CP->getType();
+ // MachineConstantPool wants an explicit alignment.
+ if (Align == 0) {
+ Align = TM->getDataLayout()->getPrefTypeAlignment(Type);
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TM->getDataLayout()->getTypeAllocSize(Type);
+ }
+ }
+
+ unsigned Idx;
+ MachineConstantPool *MCP = MF->getConstantPool();
+ if (CP->isMachineConstantPoolEntry())
+ Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ else
+ Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+ MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+ MIB.addBlockAddress(BA->getBlockAddress(),
+ BA->getOffset(),
+ BA->getTargetFlags());
+ } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+ MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags());
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ }
+}
+
+unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ MVT VT, DebugLoc DL) {
+ const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
+
+ // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg
+ // within reason.
+ if (RC && RC != VRC)
+ RC = MRI->constrainRegClass(VReg, RC, MinRCSize);
+
+ // VReg has been adjusted. It can be used with SubIdx operands now.
+ if (RC)
+ return VReg;
+
+ // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
+ // register instead.
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+ assert(RC && "No legal register class for VT supports that SubIdx");
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VReg);
+ return NewReg;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned VRBase = 0;
+ unsigned Opc = Node->getMachineOpcode();
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no
+ // constraints on the %dst register, COPY can target all legal register
+ // classes.
+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *TRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(0));
+
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned SrcReg, DstReg, DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx &&
+ TRC == MRI->getRegClass(SrcReg)) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ } else {
+ // VReg may not support a SubIdx sub-register, and we may need to
+ // constrain its register class or issue a COPY to a compatible register
+ // class.
+ VReg = ConstrainForSubReg(VReg, SubIdx,
+ Node->getOperand(0).getSimpleValueType(),
+ Node->getDebugLoc());
+
+ // Create the destreg if it is missing.
+ if (VRBase == 0)
+ VRBase = MRI->createVirtualRegister(TRC);
+
+ // Create the extract_subreg machine instruction.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx);
+ }
+ } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Figure out the register class to create for the destreg. It should be
+ // the largest legal register class supporting SubIdx sub-registers.
+ // RegisterCoalescer will constrain it further if it decides to eliminate
+ // the INSERT_SUBREG instruction.
+ //
+ // %dst = INSERT_SUBREG %src, %sub, SubIdx
+ //
+ // is lowered by TwoAddressInstructionPass to:
+ //
+ // %dst = COPY %src
+ // %dst:SubIdx = COPY %sub
+ //
+ // There is no constraint on the %src register class.
+ //
+ const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
+ SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
+ assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
+
+ if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase)))
+ VRBase = MRI->createVirtualRegister(SRC);
+
+ // Create the insert_subreg or subreg_to_reg machine instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase);
+
+ // If creating a subreg_to_reg, then the first input operand
+ // is an implicit value immediate, otherwise it's a register
+ if (Opc == TargetOpcode::SUBREG_TO_REG) {
+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+ MIB.addImm(SD->getZExtValue());
+ } else
+ AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ // Add the subregster being inserted
+ AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ MIB.addImm(SubIdx);
+ MBB->insert(InsertPos, MIB);
+ } else
+ llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+
+ // Create the new VReg in the destination class and emit a copy.
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *DstRC =
+ TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg).addReg(VReg);
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
+ const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
+ unsigned NumOps = Node->getNumOperands();
+ assert((NumOps & 1) == 1 &&
+ "REG_SEQUENCE must have an odd number of operands!");
+ for (unsigned i = 1; i != NumOps; ++i) {
+ SDValue Op = Node->getOperand(i);
+ if ((i & 1) == 0) {
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+ // Skip physical registers as they don't have a vreg to get and we'll
+ // insert copies for them in TwoAddressInstructionPass anyway.
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
+ }
+ }
+ AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ }
+
+ MBB->insert(InsertPos, MIB);
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
+///
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ uint64_t Offset = SD->getOffset();
+ MDNode* MDPtr = SD->getMDPtr();
+ DebugLoc DL = SD->getDebugLoc();
+
+ if (SD->getKind() == SDDbgValue::FRAMEIX) {
+ // Stack address; this needs to be lowered in target-dependent fashion.
+ // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+ unsigned FrameIx = SD->getFrameIx();
+ return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
+ }
+ // Otherwise, we're going to create an instruction here.
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ if (SD->getKind() == SDDbgValue::SDNODE) {
+ SDNode *Node = SD->getSDNode();
+ SDValue Op = SDValue(Node, SD->getResNo());
+ // It's possible we replaced this SDNode with other(s) and therefore
+ // didn't generate code for it. It's better to catch these cases where
+ // they happen and transfer the debug info, but trying to guarantee that
+ // in all cases would be very fragile; this is a safeguard for any
+ // that were missed.
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ if (I==VRBaseMap.end())
+ MIB.addReg(0U); // undef
+ else
+ AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+ /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+ } else if (SD->getKind() == SDDbgValue::CONST) {
+ const Value *V = SD->getConst();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getSExtValue());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ MIB.addFPImm(CF);
+ } else {
+ // Could be an Undef. In any case insert an Undef so we can see what we
+ // dropped.
+ MIB.addReg(0U);
+ }
+ } else {
+ // Insert an Undef so we can see what we dropped.
+ MIB.addReg(0U);
+ }
+
+ MIB.addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
+///
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
+
+ // Handle REG_SEQUENCE specially.
+ if (Opc == TargetOpcode::REG_SEQUENCE) {
+ EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ if (Opc == TargetOpcode::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const MCInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NumImpUses = 0;
+ unsigned NodeOperands =
+ countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses);
+ bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
+#ifndef NDEBUG
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ if (II.isVariadic())
+ assert(NumMIOperands >= II.getNumOperands() &&
+ "Too few operands for a variadic node!");
+ else
+ assert(NumMIOperands >= II.getNumOperands() &&
+ NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
+ NumImpUses &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults)
+ CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ bool HasOptPRefs = II.getNumDefs() > NumResults;
+ assert((!HasOptPRefs || !HasPhysRegOuts) &&
+ "Unable to cope with optional defs and phys regs defs!");
+ unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+ for (unsigned i = NumSkip; i != NodeOperands; ++i)
+ AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+ VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Transfer all of the memory reference descriptions of this instruction.
+ MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+ cast<MachineSDNode>(Node)->memoperands_end());
+
+ // Insert the instruction into position in the block. This needs to
+ // happen before any custom inserter hook is called so that the
+ // hook knows where in the block to insert the replacement code.
+ MBB->insert(InsertPos, MIB);
+
+ // The MachineInstr may also define physregs instead of virtregs. These
+ // physreg values can reach other instructions in different ways:
+ //
+ // 1. When there is a use of a Node value beyond the explicitly defined
+ // virtual registers, we emit a CopyFromReg for one of the implicitly
+ // defined physregs. This only happens when HasPhysRegOuts is true.
+ //
+ // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+ //
+ // 3. A glued instruction may implicitly use a physreg.
+ //
+ // 4. A glued instruction may use a RegisterSDNode operand.
+ //
+ // Collect all the used physreg defs, and make sure that any unused physreg
+ // defs are marked as dead.
+ SmallVector<unsigned, 8> UsedRegs;
+
+ // Additional results must be physical register defs.
+ if (HasPhysRegOuts) {
+ for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+ if (!Node->hasAnyUseOfValue(i))
+ continue;
+ // This implicitly defined physreg has a use.
+ UsedRegs.push_back(Reg);
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ }
+ }
+
+ // Scan the glue chain for any used physregs.
+ if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+ if (F->getOpcode() == ISD::CopyFromReg) {
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ continue;
+ } else if (F->getOpcode() == ISD::CopyToReg) {
+ // Skip CopyToReg nodes that are internal to the glue chain.
+ continue;
+ }
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
+ // Finally mark unused registers as dead.
+ if (!UsedRegs.empty() || II.getImplicitDefs())
+ MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
+
+ // Run post-isel target hook to adjust this instruction if needed.
+#ifdef NDEBUG
+ if (II.hasPostISelHook())
+#endif
+ TLI->AdjustInstrPostInstrSelection(MIB, Node);
+}
+
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump();
+#endif
+ llvm_unreachable("This target-independent node should have been selected!");
+ case ISD::EntryToken:
+ llvm_unreachable("EntryToken should have been excluded from the schedule!");
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor: // fall thru
+ break;
+ case ISD::CopyToReg: {
+ unsigned SrcReg;
+ SDValue SrcVal = Node->getOperand(2);
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+ SrcReg = R->getReg();
+ else
+ SrcReg = getVR(SrcVal, VRBaseMap);
+
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+ break;
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DestReg).addReg(SrcReg);
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ break;
+ }
+ case ISD::EH_LABEL: {
+ MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+ break;
+ }
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
+ TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
+
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1));
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addFrameIndex(FI->getIndex());
+ break;
+ }
+
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ // Create the inline asm machine instruction.
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::INLINEASM));
+
+ // Add the asm string as an external symbol operand.
+ SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+ const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
+ MIB.addExternalSymbol(AsmStr);
+
+ // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore
+ // bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
+ getZExtValue();
+ MIB.addImm(ExtraInfo);
+
+ // Remember to operand index of the group flags.
+ SmallVector<unsigned, 8> GroupIdx;
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ GroupIdx.push_back(MIB->getNumOperands());
+ MIB.addImm(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ // FIXME: Add dead flags for physical and virtual registers defined.
+ // For now, mark physical register defs as implicit to help fast
+ // regalloc. This makes inline asm look a lot like calls.
+ MIB.addReg(Reg, RegState::Define |
+ getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ }
+ break;
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ case InlineAsm::Kind_Clobber:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+ getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ }
+ break;
+ case InlineAsm::Kind_RegUse: // Use of register.
+ case InlineAsm::Kind_Imm: // Immediate.
+ case InlineAsm::Kind_Mem: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (unsigned j = 0; j != NumVals; ++j, ++i)
+ AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Manually set isTied bits.
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
+ unsigned DefGroup = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+ unsigned DefIdx = GroupIdx[DefGroup] + 1;
+ unsigned UseIdx = GroupIdx.back() + 1;
+ for (unsigned j = 0; j != NumVals; ++j)
+ MIB->tieOperands(DefIdx + j, UseIdx + j);
+ }
+ }
+ break;
+ }
+ }
+
+ // Get the mdnode from the asm if it exists and add it to the instruction.
+ SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+ const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+ if (MD)
+ MIB.addMetadata(MD);
+
+ MBB->insert(InsertPos, MIB);
+ break;
+ }
+ }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos)
+ : MF(mbb->getParent()),
+ MRI(&MF->getRegInfo()),
+ TM(&MF->getTarget()),
+ TII(TM->getInstrInfo()),
+ TRI(TM->getRegisterInfo()),
+ TLI(TM->getTargetLowering()),
+ MBB(mbb), InsertPos(insertpos) {
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 0000000..a9c2203
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,146 @@
+//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTREMITTER_H
+#define INSTREMITTER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+
+class MachineInstrBuilder;
+class MCInstrDesc;
+class SDDbgValue;
+
+class InstrEmitter {
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+
+ MachineBasicBlock *MBB;
+ MachineBasicBlock::iterator InsertPos;
+
+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+ /// implicit physical register output.
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+ bool IsClone, bool IsCloned,
+ unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getDstOfCopyToRegUse - If the only use of the specified result number of
+ /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+ unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const;
+
+ void CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getVR - Return the virtual register corresponding to the specified result
+ /// of the specified node.
+ unsigned getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// AddRegisterOperand - Add the specified register as an operand to the
+ /// specified machine instr. Insert register copies if the register is
+ /// not in the required register class.
+ void AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// AddOperand - Add the specified operand to the specified machine instr. II
+ /// specifies the instruction information for the node, and IIOpNum is the
+ /// operand number (in the II) that we are adding. IIOpNum and II are used for
+ /// assertions only.
+ void AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// ConstrainForSubReg - Try to constrain VReg to a register class that
+ /// supports SubIdx sub-registers. Emit a copy if that isn't possible.
+ /// Return the virtual register to use.
+ unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ MVT VT, DebugLoc DL);
+
+ /// EmitSubregNode - Generate machine code for subreg nodes.
+ ///
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+
+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+ /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+ /// register is constrained to be in a particular register class.
+ ///
+ void EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+ ///
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+public:
+ /// CountResults - The results of target nodes have register or immediate
+ /// operands first, then an optional chain, and optional flag operands
+ /// (which do not go into the machine instrs.)
+ static unsigned CountResults(SDNode *Node);
+
+ /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+ ///
+ MachineInstr *EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitNode - Generate machine code for a node and needed dependencies.
+ ///
+ void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Node->isMachineOpcode())
+ EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
+ else
+ EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+ }
+
+ /// getBlock - Return the current basic block.
+ MachineBasicBlock *getBlock() { return MBB; }
+
+ /// getInsertPos - Return the current insertion position.
+ MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+ /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+ /// at the given position in the given block.
+ InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+
+private:
+ void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+ void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 0000000..51cc254
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,3924 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+
+ /// LegalizePosition - The iterator for walking through the node list.
+ SelectionDAG::allnodes_iterator LegalizePosition;
+
+ /// LegalizedNodes - The set of nodes which have already been legalized.
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+
+ // Libcall insertion helpers.
+
+public:
+ explicit SelectionDAGLegalize(SelectionDAG &DAG);
+
+ void LegalizeDAG();
+
+private:
+ /// LegalizeOp - Legalizes the given operation.
+ void LegalizeOp(SDNode *Node);
+
+ SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+ void LegalizeLoadOps(SDNode *Node);
+ void LegalizeStoreOps(SDNode *Node);
+
+ /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+ /// is necessary to spill the vector being inserted into to memory, perform
+ /// the insert there, and then read the result back.
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+
+ /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+ /// performs the same shuffe in terms of order or result bytes, but on a type
+ /// whose vector element type is narrower than the original shuffle type.
+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+ SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const;
+
+ void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ DebugLoc dl);
+
+ SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+ unsigned NumOps, bool isSigned, DebugLoc dl);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
+ void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
+ SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandFCOPYSIGN(SDNode *Node);
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+ DebugLoc dl);
+ SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
+ DebugLoc dl);
+ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
+ DebugLoc dl);
+
+ SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
+ SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
+
+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
+ SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+ SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ void ExpandNode(SDNode *Node);
+ void PromoteNode(SDNode *Node);
+
+ void ForgetNode(SDNode *N) {
+ LegalizedNodes.erase(N);
+ if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
+ ++LegalizePosition;
+ }
+
+public:
+ // DAGUpdateListener implementation.
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ ForgetNode(N);
+ }
+ virtual void NodeUpdated(SDNode *N) {}
+
+ // Node replacement helpers
+ void ReplacedNode(SDNode *N) {
+ if (N->use_empty()) {
+ DAG.RemoveDeadNode(N);
+ } else {
+ ForgetNode(N);
+ }
+ }
+ void ReplaceNode(SDNode *Old, SDNode *New) {
+ DAG.ReplaceAllUsesWith(Old, New);
+ ReplacedNode(Old);
+ }
+ void ReplaceNode(SDValue Old, SDValue New) {
+ DAG.ReplaceAllUsesWith(Old, New);
+ ReplacedNode(Old.getNode());
+ }
+ void ReplaceNode(SDNode *Old, const SDValue *New) {
+ DAG.ReplaceAllUsesWith(Old, New);
+ ReplacedNode(Old);
+ }
+};
+}
+
+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const {
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ unsigned NumDestElts = NVT.getVectorNumElements();
+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+ if (NumEltsGrowth == 1)
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int Idx = Mask[i];
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (Idx < 0)
+ NewMask.push_back(-1);
+ else
+ NewMask.push_back(Idx * NumEltsGrowth + j);
+ }
+ }
+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
+ : SelectionDAG::DAGUpdateListener(dag),
+ TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+ DAG(dag) {
+}
+
+void SelectionDAGLegalize::LegalizeDAG() {
+ DAG.AssignTopologicalOrder();
+
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ for (;;) {
+ bool AnyLegalized = false;
+ for (LegalizePosition = DAG.allnodes_end();
+ LegalizePosition != DAG.allnodes_begin(); ) {
+ --LegalizePosition;
+
+ SDNode *N = LegalizePosition;
+ if (LegalizedNodes.insert(N)) {
+ AnyLegalized = true;
+ LegalizeOp(N);
+ }
+ }
+ if (!AnyLegalized)
+ break;
+
+ }
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
+ bool Extend = false;
+ DebugLoc dl = CFP->getDebugLoc();
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double. This shrinks FP constants and canonicalizes them for targets where
+ // an FP extending load is the same cost as a normal load (such as on the x87
+ // fp stack or PPC FP unit).
+ EVT VT = CFP->getValueType(0);
+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+ if (!UseCP) {
+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+ }
+
+ EVT OrigVT = VT;
+ EVT SVT = VT;
+ while (SVT != MVT::f32) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+ if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
+ }
+
+ SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ if (Extend) {
+ SDValue Result =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+ DAG.getEntryNode(),
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
+ return Result;
+ }
+ SDValue Result =
+ DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(), false, false, false,
+ Alignment);
+ return Result;
+}
+
+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SelectionDAGLegalize *DAGLegalize) {
+ assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed stores not implemented!");
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ EVT VT = Val.getValueType();
+ int Alignment = ST->getAlignment();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ST->getMemoryVT().isFloatingPoint() ||
+ ST->getMemoryVT().isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
+ }
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ EVT StoredVT = ST->getMemoryVT();
+ MVT RegVT =
+ TLI.getRegisterType(*DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ StoredVT.getSizeInBits()));
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl,
+ Val, StackPtr, MachinePointerInfo(),
+ StoredVT, false, false, 0);
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ ST->isVolatile(), ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
+
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo()
+ .getWithOffset(Offset),
+ MemVT, ST->isVolatile(),
+ ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
+ }
+ assert(ST->getMemoryVT().isInteger() &&
+ !ST->getMemoryVT().isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+ int NumBits = NewStoredVT.getSizeInBits();
+ int IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount = DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(Val.getValueType()));
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
+ ST->getPointerInfo(), NewStoredVT,
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
+ Alignment);
+
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+}
+
+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+static void
+ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SDValue &ValResult, SDValue &ChainResult) {
+ assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed loads not implemented!");
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(),
+ LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
+
+ ValResult = Result;
+ ChainResult = Chain;
+ return;
+ }
+
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), false, false, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), MemVT,
+ false, false, 0));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo(), LoadedVT, false, false, 0);
+
+ // Callers expect a MERGE_VALUES node.
+ ValResult = Load;
+ ChainResult = TF;
+ return;
+ }
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ EVT NewLoadedVT;
+ NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+ NumBits >>= 1;
+
+ unsigned Alignment = LD->getAlignment();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (TLI.isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount = DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(Hi.getValueType()));
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ ValResult = Result;
+ ChainResult = TF;
+}
+
+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::
+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ DebugLoc dl) {
+ SDValue Tmp1 = Vec;
+ SDValue Tmp2 = Val;
+ SDValue Tmp3 = Idx;
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ EVT VT = Tmp1.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxVT = Tmp3.getValueType();
+ EVT PtrVT = TLI.getPointerTy();
+ SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Store the vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI),
+ false, false, 0);
+
+ // Truncate or zero extend offset to target pointer type.
+ unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+ // Add the offset to the index.
+ unsigned EltSize = EltVT.getSizeInBits()/8;
+ Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
+ false, false, 0);
+ // Load the updated vector.
+ return DAG.getLoad(VT, dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI), false, false,
+ false, 0);
+}
+
+
+SDValue SelectionDAGLegalize::
+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+ // SCALAR_TO_VECTOR requires that the type of the value being inserted
+ // match the element type of the vector being created, except for
+ // integers in which case the inserted value can be over width.
+ EVT EltVT = Vec.getValueType().getVectorElementType();
+ if (Val.getValueType() == EltVT ||
+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ Vec.getValueType(), Val);
+
+ unsigned NumElts = Vec.getValueType().getVectorNumElements();
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask
+ // should be 0,1,2,3,4,5... with the appropriate element replaced with
+ // elt 0 of the RHS.
+ SmallVector<int, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
+ &ShufOps[0]);
+ }
+ }
+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: We shouldn't do this for TargetConstantFP's.
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ // We generally can't do this one for long doubles.
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+ if (CFP->getValueType(0) == MVT::f32 &&
+ TLI.isTypeLegal(MVT::i32)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().
+ bitcastToAPInt().zextOrTrunc(32),
+ MVT::i32);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
+ // If this target supports 64-bit registers, do a single 64-bit store.
+ if (TLI.isTypeLegal(MVT::i64)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ zextOrTrunc(64), MVT::i64);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+ // stores. If the target supports neither 32- nor 64-bits, this
+ // xform is certainly not worth it.
+ const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(4));
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+ }
+ }
+ return SDValue(0, 0);
+}
+
+void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ DebugLoc dl = Node->getDebugLoc();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ if (!ST->isTruncatingStore()) {
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ {
+ SDValue Value = ST->getValue();
+ MVT VT = Value.getSimpleValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node),
+ DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote stores to same size type");
+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+ } else {
+ SDValue Value = ST->getValue();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
+ StVT.getSimpleVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(TLI.isTypeLegal(StVT) &&
+ "Do not know how to expand this store!");
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ }
+}
+
+void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDValue Chain = LD->getChain(); // The chain.
+ SDValue Ptr = LD->getBasePtr(); // The base pointer.
+ SDValue Value; // The value returned by the load op.
+ DebugLoc dl = Node->getDebugLoc();
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ MVT VT = Node->getSimpleValueType(0);
+ SDValue RVal = SDValue(Node, 0);
+ SDValue RChain = SDValue(Node, 1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
+ }
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(RVal, DAG);
+ if (Res.getNode()) {
+ RVal = Res;
+ RChain = Res.getValue(1);
+ }
+ break;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote loads to same size type");
+
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ RChain = Res.getValue(1);
+ break;
+ }
+ }
+ if (RChain.getNode() != Node) {
+ assert(RVal.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ ReplacedNode(Node);
+ }
+ return;
+ }
+
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Value = Result;
+ Chain = Ch;
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Chain = Ch;
+ } else {
+ bool isCustom = false;
+ switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal: {
+ Value = SDValue(Node, 0);
+ Chain = SDValue(Node, 1);
+
+ if (isCustom) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
+ Value = Res;
+ Chain = Res.getValue(1);
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Value, Chain);
+ }
+ }
+ }
+ break;
+ }
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
+ break;
+ }
+
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
+
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Value = ValRes;
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ if (Chain.getNode() != Node) {
+ assert(Value.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ }
+}
+
+/// LegalizeOp - Return a legal replacement for the given operation, with
+/// all legal operands.
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return;
+
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal &&
+ "Unexpected illegal type!");
+
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ assert((TLI.getTypeAction(*DAG.getContext(),
+ Node->getOperand(i).getValueType()) ==
+ TargetLowering::TypeLegal ||
+ Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
+
+ // Figure out the correct action; the way to query this varies by opcode
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ bool SimpleFinishLegalizing = true;
+ switch (Node->getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::STACKSAVE:
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::VAARG:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ if (Action != TargetLowering::Promote)
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::EXTRACT_VECTOR_ELT:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(2).getValueType());
+ break;
+ }
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::BR_CC: {
+ unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal) {
+ if (Node->getOpcode() == ISD::SELECT_CC)
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ else
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ }
+ break;
+ }
+ case ISD::LOAD:
+ case ISD::STORE:
+ // FIXME: Model these properly. LOAD and STORE are complicated, and
+ // STORE expects the unlegalized operand in some cases.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ // FIXME: This shouldn't be necessary. These nodes have special properties
+ // dealing with the recursive nature of legalization. Removing this
+ // special case should be done as part of making LegalizeDAG non-recursive.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ case ISD::FLT_ROUNDS_:
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::UADDO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ case ISD::FPOWI:
+ case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_SJLJ_SETJMP:
+ case ISD::EH_SJLJ_LONGJMP:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be expanded.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+ case ISD::INIT_TRAMPOLINE:
+ case ISD::ADJUST_TRAMPOLINE:
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be custom-lowered.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Custom;
+ break;
+ case ISD::DEBUGTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::DEBUGTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
+
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ Action = TargetLowering::Legal;
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ }
+ break;
+ }
+
+ if (SimpleFinishLegalizing) {
+ SDNode *NewNode = Node;
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Node->getOperand(1).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(1));
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Handle.getValue());
+ }
+ break;
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SHL_PARTS:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Node->getOperand(2).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(2));
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Node->getOperand(1),
+ Handle.getValue());
+ }
+ break;
+ }
+
+ if (NewNode != Node) {
+ DAG.ReplaceAllUsesWith(Node, NewNode);
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
+ ReplacedNode(Node);
+ Node = NewNode;
+ }
+ switch (Action) {
+ case TargetLowering::Legal:
+ return;
+ case TargetLowering::Custom: {
+ // FIXME: The handling for custom lowering with multiple results is
+ // a complete mess.
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
+ SmallVector<SDValue, 8> ResultVals;
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+ if (e == 1)
+ ResultVals.push_back(Res);
+ else
+ ResultVals.push_back(Res.getValue(i));
+ }
+ if (Res.getNode() != Node || Res.getResNo() != 0) {
+ DAG.ReplaceAllUsesWith(Node, ResultVals.data());
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
+ ReplacedNode(Node);
+ }
+ return;
+ }
+ }
+ // FALL THROUGH
+ case TargetLowering::Expand:
+ ExpandNode(Node);
+ return;
+ case TargetLowering::Promote:
+ PromoteNode(Node);
+ return;
+ }
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "NODE: ";
+ Node->dump( &DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to legalize this operator!");
+
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ break;
+ case ISD::LOAD: {
+ return LegalizeLoadOps(Node);
+ }
+ case ISD::STORE: {
+ return LegalizeStoreOps(Node);
+ }
+ }
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+ else
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+ if (Op.getValueType().isVector())
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
+ false, false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType(),
+ false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Then store the inserted part.
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+ else
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ StackPtr);
+
+ // Store the subvector.
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+ false, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+ // We can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each element into it, then load
+ // the result as a vector.
+ // Create the stack frame object.
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue FIPtr = DAG.CreateStackTemporary(VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDValue, 8> Stores;
+ unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+ // If the destination vector element type is narrower than the source
+ // element type, only store the bits necessary.
+ if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
+ EltVT, false, false, 0));
+ } else
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, 0));
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo,
+ false, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+
+ // Get the sign bit of the RHS. First obtain a value that has the same
+ // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
+ SDValue SignBit;
+ EVT FloatVT = Tmp2.getValueType();
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+ if (TLI.isTypeLegal(IVT)) {
+ // Convert to an integer with the same sign bit.
+ SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
+ } else {
+ // Store the float to memory, then load the sign part out as an integer.
+ MVT LoadTy = TLI.getPointerTy();
+ // First create a temporary that is aligned for both the load and store.
+ SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+ // Then store the float to it.
+ SDValue Ch =
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+ if (TLI.isBigEndian()) {
+ assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+ // Load out a legal integer with the same sign bit as the float.
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+ } else { // Little endian
+ SDValue LoadPtr = StackPtr;
+ // The float may be wider than the integer we are going to load. Advance
+ // the pointer so that the loaded integer will contain the sign bit.
+ unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
+ unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
+ LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
+ LoadPtr, DAG.getIntPtrConstant(ByteOffset));
+ // Load a legal integer containing the sign bit.
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+ false, false, false, 0);
+ // Move the sign bit to the top bit of the loaded integer.
+ unsigned BitShift = LoadTy.getSizeInBits() -
+ (FloatVT.getSizeInBits() - 8 * ByteOffset);
+ assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
+ if (BitShift)
+ SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
+ DAG.getConstant(BitShift,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
+ }
+ }
+ // Now get the sign bit proper, by seeing whether the value is negative.
+ SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+ SignBit, DAG.getConstant(0, SignBit.getValueType()),
+ ISD::SETLT);
+ // Get the absolute value of the result.
+ SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
+ // Select between the nabs and abs value based on the sign bit of
+ // the input.
+ return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
+ DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+ AbsVal);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ if (Align > StackAlign)
+ SP = DAG.getNode(ISD::AND, dl, VT, SP,
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp2);
+}
+
+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
+/// condition code CC on the current target. This routine expands SETCC with
+/// illegal condition code into AND / OR of multiple SETCC values.
+void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+ SDValue &LHS, SDValue &RHS,
+ SDValue &CC,
+ DebugLoc dl) {
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default: llvm_unreachable("Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ ISD::CondCode InvCC = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default: llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETO:
+ assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
+ == TargetLowering::Legal
+ && "If SETO is expanded, SETOEQ must be legal!");
+ CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+ case ISD::SETUO:
+ assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
+ == TargetLowering::Legal
+ && "If SETUO is expanded, SETUNE must be legal!");
+ CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If we are floating point, assign and break, otherwise fall through.
+ if (!OpVT.isInteger()) {
+ // We can use the 4th bit to tell if we are the unordered
+ // or ordered version of the opcode.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+ CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+ break;
+ }
+ // Fallthrough if we are unsigned integer.
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETLT:
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
+ // We only support using the inverted operation and not a
+ // different manner of supporting expanding these cases.
+ llvm_unreachable("Don't know how to expand this condition!");
+ }
+ LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
+ RHS = SDValue();
+ CC = SDValue();
+ return;
+ }
+
+ SDValue SetCC1, SetCC2;
+ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+ // If we aren't the ordered or unorder operation,
+ // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ } else {
+ // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+ }
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ break;
+ }
+ }
+}
+
+/// EmitStackConvert - Emit a store/load combination to the stack. This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
+ EVT SlotVT,
+ EVT DestVT,
+ DebugLoc dl) {
+ // Create the stack frame object.
+ unsigned SrcAlign =
+ TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType().
+ getTypeForEVT(*DAG.getContext()));
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+ int SPFI = StackPtrFI->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+ unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType);
+
+ // Emit a store to the stack slot. Use a truncstore if the input value is
+ // later than DestVT.
+ SDValue Store;
+
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ PtrInfo, SlotVT, false, false, SrcAlign);
+ else {
+ assert(SrcSize == SlotSize && "Invalid store");
+ Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ PtrInfo, false, false, SrcAlign);
+ }
+
+ // Result is a load from the stack slot.
+ if (SlotSize == DestSize)
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+ false, false, false, DestAlign);
+
+ assert(SlotSize < DestSize && "Unknown extension!");
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+ PtrInfo, SlotVT, false, false, DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+ int SPFI = StackPtrFI->getIndex();
+
+ SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
+ StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI),
+ Node->getValueType(0).getVectorElementType(),
+ false, false, 0);
+ return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(SPFI),
+ false, false, false, 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+ unsigned NumElems = Node->getNumOperands();
+ SDValue Value1, Value2;
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Node->getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ bool isOnlyLowElement = true;
+ bool MoreThanTwoValues = false;
+ bool isConstant = true;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value1.getNode()) {
+ Value1 = V;
+ } else if (!Value2.getNode()) {
+ if (V != Value1)
+ Value2 = V;
+ } else if (V != Value1 && V != Value2) {
+ MoreThanTwoValues = true;
+ }
+ }
+
+ if (!Value1.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ SmallVector<Constant*, 16> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ if (OpVT==EltVT)
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ else {
+ // If OpVT and EltVT don't match, EltVT is not legal and the
+ // element values have been promoted/truncated earlier. Undo this;
+ // we don't want a v16i8 to become a v16i32 for example.
+ const ConstantInt *CI = V->getConstantIntValue();
+ CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+ CI->getZExtValue()));
+ }
+ } else {
+ assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+ Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ }
+
+ if (!MoreThanTwoValues) {
+ SmallVector<int, 8> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+ }
+ if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+ SDValue Vec2;
+ if (Value2.getNode())
+ Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+ else
+ Vec2 = DAG.getUNDEF(VT);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently.
+ return ExpandVectorBuildThroughStack(Node);
+}
+
+// ExpandLibCall - Expand a node into a call to a libcall. If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument. If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // TLI.isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ SDValue TCChain = InChain;
+ bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain);
+ if (isTailCall)
+ InChain = TCChain;
+
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), isTailCall,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
+ return CallInfo.first;
+}
+
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo.first;
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::f128: LC = Call_F128; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+ return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC = Call_I8; break;
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
+ case MVT::i128: LC = Call_I128; break;
+ }
+ return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != 0;
+}
+
+/// useDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+ // The other use might have been replaced with a divrem already.
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ unsigned OtherOpcode = 0;
+ if (isSigned)
+ OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
+ else
+ OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1)
+ return true;
+ }
+ return false;
+}
+
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = Opcode == ISD::SDIVREM;
+
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the return address of the remainder.
+ SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = FIPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ DebugLoc dl = Node->getDebugLoc();
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ // Remainder is loaded back from the stack frame.
+ SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
+ MachinePointerInfo(), false, false, false, 0);
+ Results.push_back(CallInfo.first);
+ Results.push_back(Rem);
+}
+
+/// isSinCosLibcallAvailable - Return true if sincos libcall is available.
+static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+ return TLI.getLibcallName(LC) != 0;
+}
+
+/// canCombineSinCosLibcall - Return true if sincos libcall is available and
+/// can be used to combine sin and cos.
+static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
+ const TargetMachine &TM) {
+ if (!isSinCosLibcallAvailable(Node, TLI))
+ return false;
+ // GNU sin/cos functions set errno while sincos does not. Therefore
+ // combining sin and cos is only safe if unsafe-fpmath is enabled.
+ bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU;
+ if (isGNU && !TM.Options.UnsafeFPMath)
+ return false;
+ return true;
+}
+
+/// useSinCos - Only issue sincos libcall if both sin and cos are
+/// needed.
+static bool useSinCos(SDNode *Node) {
+ unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
+ ? ISD::FCOS : ISD::FSIN;
+
+ SDValue Op0 = Node->getOperand(0);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ // The other user might have been turned into sincos already.
+ if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS)
+ return true;
+ }
+ return false;
+}
+
+/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos
+/// pairs.
+void
+SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // Pass the argument.
+ Entry.Node = Node->getOperand(0);
+ Entry.Ty = RetTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Pass the return address of sin.
+ SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = SinPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Also pass the return address of the cos.
+ SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = CosPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ DebugLoc dl = Node->getDebugLoc();
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr,
+ MachinePointerInfo(), false, false, false, 0));
+ Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr,
+ MachinePointerInfo(), false, false, false, 0));
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+ SDValue Op0,
+ EVT DestVT,
+ DebugLoc dl) {
+ if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+ // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+ // Get the stack frame index of a 8 byte buffer.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+ // word offset constant for Hi/Lo address computation
+ SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+ // set up Hi and Lo (into buffer) address based on endian
+ SDValue Hi = StackSlot;
+ SDValue Lo = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(), StackSlot, WordOff);
+ if (TLI.isLittleEndian())
+ std::swap(Hi, Lo);
+
+ // if signed map to unsigned space
+ SDValue Op0Mapped;
+ if (isSigned) {
+ // constant used to invert sign bit (signed to unsigned mapping)
+ SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+ Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+ } else {
+ Op0Mapped = Op0;
+ }
+ // store the lo of the constructed double - based on integer input
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
+ Op0Mapped, Lo, MachinePointerInfo(),
+ false, false, 0);
+ // initial hi portion of constructed double
+ SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+ // store the hi of the constructed double - biased exponent
+ SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+ MachinePointerInfo(),
+ false, false, 0);
+ // load the constructed double
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+ MachinePointerInfo(), false, false, false, 0);
+ // FP constant to bias correct the final result
+ SDValue Bias = DAG.getConstantFP(isSigned ?
+ BitsToDouble(0x4330000080000000ULL) :
+ BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ // subtract the bias
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ // final result
+ SDValue Result;
+ // handle final rounding
+ if (DestVT == MVT::f64) {
+ // do nothing
+ Result = Sub;
+ } else if (DestVT.bitsLT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+ return Result;
+ }
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ // Code below here assumes !isSigned without checking again.
+
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+ SDValue TwoP52 =
+ DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+ SDValue TwoP84PlusTwoP52 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+ SDValue TwoP84 =
+ DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+
+ SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+ DAG.getConstant(32, MVT::i64));
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+ SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+ SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
+ TwoP84PlusTwoP52);
+ return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+ }
+
+ // Implementation of unsigned i64 to f32.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ if (!isSigned) {
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+ SDValue ShiftConst =
+ DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, MVT::i64);
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ //pseudo-op, or, even better, for whole-function isel.
+ SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+ return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+ }
+
+ // Otherwise, implement the fully general conversion.
+
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
+ DAG.getConstant(UINT64_C(0x800), MVT::i64));
+ SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
+ SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
+ SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
+ ISD::SETUGE);
+ SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+ EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
+
+ SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
+ DAG.getConstant(32, SHVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
+ SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
+ SDValue TwoP32 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+ SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
+ SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
+ SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+ return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
+ DAG.getIntPtrConstant(0));
+ }
+
+ SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+ SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETLT);
+ SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported integer type!");
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (TLI.isLittleEndian()) FF <<= 32;
+ Constant *FudgeFactor = ConstantInt::get(
+ Type::getInt64Ty(*DAG.getContext()), FF);
+
+ SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+ Alignment = std::min(Alignment, 4u);
+ SDValue FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ else {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, Alignment);
+ HandleSDNode Handle(Load);
+ LegalizeOp(Load.getNode());
+ FudgeInReg = Handle.getValue();
+ }
+
+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
+ EVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ EVT NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+ assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::SINT_TO_FP;
+ break;
+ }
+ if (isSigned) continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::UINT_TO_FP;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ return DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
+ EVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ EVT NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_SINT;
+ break;
+ }
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_UINT;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+
+ // Okay, we found the operation and type to use.
+ SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+ // Truncate the result of the extended FP_TO_*INT operation to the desired
+ // size.
+ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled Expand type in BSWAP!");
+ case MVT::i16:
+ Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+ DebugLoc dl) {
+ switch (Opc) {
+ default: llvm_unreachable("Cannot expand this yet!");
+ case ISD::CTPOP: {
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT);
+ unsigned Len = VT.getSizeInBits();
+
+ assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+ "CTPOP not implemented for this type.");
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+ SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT);
+ SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT);
+ SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT);
+ SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ Op = DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, ShVT));
+
+ return Op;
+ }
+ case ISD::CTLZ_ZERO_UNDEF:
+ // This trivially expands to CTLZ.
+ return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
+ case ISD::CTLZ: {
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT);
+ unsigned len = VT.getSizeInBits();
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ }
+ case ISD::CTTZ_ZERO_UNDEF:
+ // This trivially expands to CTTZ.
+ return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
+ case ISD::CTTZ: {
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // see also http://www.hackersdelight.org/HDcode/ntz.cc
+ EVT VT = Op.getValueType();
+ SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getConstant(1, VT)));
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(VT.getSizeInBits(), VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+ }
+ }
+}
+
+std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
+void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ switch (Node->getOpcode()) {
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BSWAP:
+ Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+ break;
+ case ISD::FLT_ROUNDS_:
+ Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+ break;
+ case ISD::EH_RETURN:
+ case ISD::EH_LABEL:
+ case ISD::PREFETCH:
+ case ISD::VAEND:
+ case ISD::EH_SJLJ_LONGJMP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.push_back(DAG.getConstant(0, MVT::i32));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::ATOMIC_FENCE:
+ case ISD::MEMBARRIER: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::
+ CallLoweringInfo CLI(Node->getOperand(0),
+ Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy()),
+ Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::ATOMIC_LOAD: {
+ // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
+ SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ // There is no libcall for atomic store; fake it with ATOMIC_SWAP.
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::DYNAMIC_STACKALLOC:
+ ExpandDYNAMIC_STACKALLOC(Node, Results);
+ break;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0; i < Node->getNumValues(); i++)
+ Results.push_back(Node->getOperand(i));
+ break;
+ case ISD::UNDEF: {
+ EVT VT = Node->getValueType(0);
+ if (VT.isInteger())
+ Results.push_back(DAG.getConstant(0, VT));
+ else {
+ assert(VT.isFloatingPoint() && "Unknown value type!");
+ Results.push_back(DAG.getConstantFP(0, VT));
+ }
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ TargetLowering::
+ CallLoweringInfo CLI(Node->getOperand(0),
+ Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+ Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FP_ROUND:
+ case ISD::BITCAST:
+ Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ // NOTE: we could fall back on load/store here too for targets without
+ // SAR. However, it is doubtful that any exist.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+ EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
+ if (VT.isVector())
+ ShiftAmountTy = VT;
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_ROUND_INREG: {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targeting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+ Node->getOperand(0), Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_TO_UINT: {
+ SDValue True, False;
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+ APFloat apf(DAG.EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
+ APInt x = APInt::getSignBit(NVT.getSizeInBits());
+ (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+ Tmp1 = DAG.getConstantFP(apf, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ Node->getOperand(0),
+ Tmp1, ISD::SETLT);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+ DAG.getNode(ISD::FSUB, dl, VT,
+ Node->getOperand(0), Tmp1));
+ False = DAG.getNode(ISD::XOR, dl, NVT, False,
+ DAG.getConstant(x, NVT));
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VAARG: {
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ unsigned Align = Node->getConstantOperandVal(3);
+
+ SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V),
+ false, false, false, 0);
+ SDValue VAList = VAListLoad;
+
+ if (Align > TLI.getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(Align - 1,
+ TLI.getPointerTy()));
+
+ VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(-(int64_t)Align,
+ TLI.getPointerTy()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(TLI.getDataLayout()->
+ getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+ TLI.getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
+ // Load the actual argument out of the pointer VAList
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+ false, false, false, 0));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::VACOPY: {
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(2), MachinePointerInfo(VS),
+ false, false, false, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ // This must be an access of the only element. Return it.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ else
+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::CONCAT_VECTORS: {
+ Results.push_back(ExpandVectorBuildThroughStack(Node));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR:
+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getOperand(2), dl));
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 32> NewMask;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!TLI.isTypeLegal(EltVT)) {
+
+ EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+ // BUILD_VECTOR operands are allowed to be wider than the element type.
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+ if (NewEltVT.bitsLT(EltVT)) {
+
+ // Convert shuffle node.
+ // If original node was v4i64 and the new EltVT is i32,
+ // cast operands to v8i32 and re-build the mask.
+
+ // Calculate new VT, the size of the new VT should be equal to original.
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits()/NewEltVT.getSizeInBits());
+ assert(NewVT.bitsEq(VT));
+
+ // cast operands to new VT
+ Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+ // Convert the shuffle mask
+ unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+ // EltVT gets smaller
+ assert(factor > 0);
+
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0) {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]);
+ }
+ else {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]*factor+fi);
+ }
+ }
+ Mask = NewMask;
+ VT = NewVT;
+ }
+ EltVT = NewEltVT;
+ }
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ unsigned Idx = Mask[i];
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Op0,
+ DAG.getIntPtrConstant(Idx)));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Op1,
+ DAG.getIntPtrConstant(Idx - NumElems)));
+ }
+
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ EVT OpTy = Node->getOperand(0).getValueType();
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ // 1 -> Hi
+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+ DAG.getConstant(OpTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+ } else {
+ // 0 -> Lo
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::STACKSAVE:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ } else {
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::STACKRESTORE:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+ Node->getOperand(1)));
+ } else {
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ break;
+ case ISD::FNEG:
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+ Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+ Node->getOperand(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FABS: {
+ // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+ EVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = DAG.getConstantFP(0.0, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, ISD::SETUGT);
+ Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ EVT VT = Node->getValueType(0);
+ bool isSIN = Node->getOpcode() == ISD::FSIN;
+ // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
+ // fcos which share the same operand and both are used.
+ if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
+ canCombineSinCosLibcall(Node, TLI, TM))
+ && useSinCos(Node)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
+ if (!isSIN)
+ Tmp1 = Tmp1.getValue(1);
+ Results.push_back(Tmp1);
+ } else if (isSIN) {
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
+ } else {
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
+ }
+ break;
+ }
+ case ISD::FSINCOS:
+ // Expand into sincos libcall.
+ ExpandSinCosLibCall(Node, Results);
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
+ break;
+ case ISD::FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128));
+ break;
+ case ISD::FP16_TO_FP32:
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ break;
+ case ISD::FP32_TO_FP16:
+ Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
+ break;
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ // Check to see if this FP immediate is already legal.
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ Results.push_back(ExpandConstantFP(CFP, true));
+ break;
+ }
+ case ISD::EHSELECTION: {
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::EXCEPTIONADDR: {
+ unsigned Reg = TLI.getExceptionPointerRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::FSUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ "Don't know how to expand this FP subtraction!");
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+ "Don't know how to expand this subtraction!");
+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+ break;
+ }
+ case ISD::UREM:
+ case ISD::SREM: {
+ EVT VT = Node->getValueType(0);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ Tmp2 = Node->getOperand(0);
+ Tmp3 = Node->getOperand(1);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ // If div is legal, it's better to do the normal expansion
+ !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
+ useDivRem(Node, isSigned, false))) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ } else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::UDIV:
+ case ISD::SDIV: {
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ useDivRem(Node, isSigned, true)))
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+ ISD::SMUL_LOHI;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+ "If this wasn't legal, it shouldn't have been created!");
+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ }
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
+ case ISD::MUL: {
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ // See if multiply or divide can be lowered using two-result operations.
+ // We just need the low half of the multiply; try both the signed
+ // and unsigned forms. If the target supports both SMUL_LOHI and
+ // UMUL_LOHI, form a preference by checking which forms of plain
+ // MULH it supports.
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+ unsigned OpToUse = 0;
+ if (HasSMUL_LOHI && !HasMULHS) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI && !HasMULHU) {
+ OpToUse = ISD::UMUL_LOHI;
+ } else if (HasSMUL_LOHI) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI) {
+ OpToUse = ISD::UMUL_LOHI;
+ }
+ if (OpToUse) {
+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1)));
+ break;
+ }
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ EVT OType = Node->getValueType(1);
+
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Results.push_back(Cmp);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
+ Node->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT));
+ break;
+ }
+ case ISD::UMULO:
+ case ISD::SMULO: {
+ EVT VT = Node->getValueType(0);
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static const unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+ if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() * 2))) {
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(1));
+ } else {
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(1));
+ // Ret is a node with an illegal type. Because such things are not
+ // generally permitted during this phase of legalization, delete the
+ // node. The above EXTRACT_ELEMENT nodes should have been folded.
+ DAG.DeleteNode(Ret.getNode());
+ }
+
+ if (isSigned) {
+ Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+ TLI.getShiftAmountTy(BottomHalf.getValueType()));
+ Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+ ISD::SETNE);
+ } else {
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+ DAG.getConstant(0, VT), ISD::SETNE);
+ }
+ Results.push_back(BottomHalf);
+ Results.push_back(TopHalf);
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT PairTy = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+ Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy(PairTy)));
+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1,
+ DAG.getConstant(0, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BR_JT: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Table = Node->getOperand(1);
+ SDValue Index = Node->getOperand(2);
+
+ EVT PTy = TLI.getPointerTy();
+
+ const DataLayout &TD = *TLI.getDataLayout();
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+
+ Index = DAG.getNode(ISD::MUL, dl, PTy,
+ Index, DAG.getConstant(EntrySize, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT,
+ false, false, 0);
+ Addr = LD;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+ TLI.getPICJumpTableRelocBase(Table, DAG));
+ }
+ Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BRCOND:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ if (Tmp2.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+ Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ // We test only the i1 bit. Skip the AND if UNDEF.
+ Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+ DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, Tmp2.getValueType()));
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp3,
+ DAG.getConstant(0, Tmp3.getValueType()),
+ Node->getOperand(2));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SETCC: {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+
+ // If we expanded the SETCC into an AND/OR, return the new node
+ if (Tmp2.getNode() == 0) {
+ Results.push_back(Tmp1);
+ break;
+ }
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ EVT VT = Node->getValueType(0);
+ int TrueValue;
+ switch (TLI.getBooleanContents(VT.isVector())) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ TrueValue = 1;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ TrueValue = -1;
+ break;
+ }
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+ DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT),
+ Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = Node->getOperand(2); // True
+ Tmp4 = Node->getOperand(3); // False
+ SDValue CC = Node->getOperand(4);
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, CC, dl);
+
+ assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, Tmp4, CC);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BR_CC: {
+ Tmp1 = Node->getOperand(0); // Chain
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
+ Tmp2, Tmp3, Tmp4, dl);
+
+ assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+ Tmp3, Node->getOperand(4));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ Results.push_back(ExpandBUILD_VECTOR(Node));
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+ SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+ SDValue Result =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Scalars[0], Scalars.size());
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // FIXME: Custom lowering for these operations shouldn't return null!
+ break;
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
+}
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ MVT OVT = Node->getSimpleValueType(0);
+ if (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::SETCC) {
+ OVT = Node->getOperand(0).getSimpleValueType();
+ }
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3;
+ switch (Node->getOpcode()) {
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ // Zero extend the argument.
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+ // already the correct result.
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ if (Node->getOpcode() == ISD::CTTZ) {
+ // FIXME: This should set a bit in the zero extended value instead.
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
+ Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+ ISD::SETEQ);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
+ DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+ } else if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+ }
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ break;
+ case ISD::BSWAP: {
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::FP_TO_SINT, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::SINT_TO_FP, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::VAARG: {
+ SDValue Chain = Node->getOperand(0); // Get the chain.
+ SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+ unsigned TruncOp;
+ if (OVT.isVector()) {
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger()
+ && "VAARG promotion is supported only for vectors or integer types");
+ TruncOp = ISD::TRUNCATE;
+ }
+
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+ Node->getConstantOperandVal(3));
+ Chain = Tmp1.getValue(1);
+
+ Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ break;
+ }
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ unsigned ExtOp, TruncOp;
+ if (OVT.isVector()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger() && "Cannot promote logic operation");
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ }
+ // Promote each of the values to the new type.
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+ break;
+ }
+ case ISD::SELECT: {
+ unsigned ExtOp, TruncOp;
+ if (Node->getValueType(0).isVector()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else if (Node->getValueType(0).isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ Tmp1 = Node->getOperand(0);
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ // Perform the larger operation, then round down.
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+ if (TruncOp != ISD::FP_ROUND)
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+ else
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+ DAG.getIntPtrConstant(0));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SETCC: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Node->getOperand(2)));
+ break;
+ }
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FPOW: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(0)));
+ break;
+ }
+ case ISD::FLOG2:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FEXP: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp2, DAG.getIntPtrConstant(0)));
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
+}
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize() {
+ /// run - This is the main entry point to this class.
+ ///
+ SelectionDAGLegalize(*this).LegalizeDAG();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 0000000..de217d8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,1451 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float". For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::f128 ? Call_F128 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Result Float to Integer Conversion.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften the result of this operator!");
+
+ case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
+ case ISD::ConstantFP:
+ R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
+ case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetSoftenedFloat(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return BitConvertToInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+ // Convert the inputs to integers, and build a new pair out of them.
+ return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)),
+ BitConvertToInteger(N->getOperand(0)),
+ BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
+ return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ NewOp.getValueType().getVectorElementType(),
+ NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Size = NVT.getSizeInBits();
+
+ // Mask = ~(1 << (Size-1))
+ APInt API = APInt::getAllOnesValue(Size);
+ API.clearBit(Size-1);
+ SDValue Mask = DAG.getConstant(API, NVT);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
+ DAG.getConstant(RSize - 1,
+ TLI.getShiftAmountTy(RVT)));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
+ DAG.getConstant(LSize - 1,
+ TLI.getShiftAmountTy(LVT)));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)),
+ GetSoftenedFloat(N->getOperand(2)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, 3, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+ GetSoftenedFloat(N->getOperand(0)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+ N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ "Unsupported power type!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_F128,
+ RTLIB::REM_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewL;
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
+ NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), NVT, L->isVolatile(),
+ L->isNonTemporal(), false, L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ // Do a non-extending load followed by FP_EXTEND.
+ NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
+ L->getMemoryVT(), dl, L->getChain(),
+ L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
+ L->getMemoryVT(), L->isVolatile(),
+ L->isNonTemporal(), false, L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewVAARG;
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly
+ // match. Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC,
+ TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+ &Op, 1, false, dl);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Float to Integer Conversion..
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften this operator's operand!");
+
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
+ case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (ST->isTruncatingStore())
+ // Do an FP_ROUND followed by a non-truncating store.
+ Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+ Val, DAG.getIntPtrConstant(0)));
+ else
+ Val = GetSoftenedFloat(Val);
+
+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+ case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(NVT.getSizeInBits() == integerPartWidth &&
+ "Do not know how to expand this float constant!");
+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(integerPartWidth, C.getRawData()[1])),
+ NVT);
+ Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(integerPartWidth, C.getRawData()[0])),
+ NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Tmp;
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+ DAG.getCondCode(ISD::SETEQ));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COPYSIGN_F32,
+ RTLIB::COPYSIGN_F64,
+ RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_F128,
+ RTLIB::COPYSIGN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ N->getValueType(0), Ops, 3, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+
+ // Remember the chain.
+ Chain = Hi.getValue(1);
+
+ // The low part is zero.
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), NVT);
+
+ // Modified the chain - switch anything that used the old chain to use the
+ // new one.
+ ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ DebugLoc dl = N->getDebugLoc();
+
+ // First do an SINT_TO_FP, whether the original was signed or unsigned.
+ // When promoting partial word types to i32 we must honor the signedness,
+ // though.
+ if (SrcVT.bitsLE(MVT::i32)) {
+ // The integer can be represented exactly in an f64.
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i32, Src);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), NVT);
+ Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ } else {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (SrcVT.bitsLE(MVT::i64)) {
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i64, Src);
+ LC = RTLIB::SINTTOFP_I64_PPCF128;
+ } else if (SrcVT.bitsLE(MVT::i128)) {
+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+ LC = RTLIB::SINTTOFP_I128_PPCF128;
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl);
+ GetPairElements(Hi, Lo, Hi);
+ }
+
+ if (isSigned)
+ return;
+
+ // Unsigned - fix up the SINT_TO_FP value just calculated.
+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+ SrcVT = Src.getValueType();
+
+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+ ArrayRef<uint64_t> Parts;
+
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unsupported UINT_TO_FP!");
+ case MVT::i32:
+ Parts = TwoE32;
+ break;
+ case MVT::i64:
+ Parts = TwoE64;
+ break;
+ case MVT::i128:
+ Parts = TwoE128;
+ break;
+ }
+
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+ DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
+ APInt(128, Parts)),
+ MVT::ppcf128));
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
+ Lo, Hi, DAG.getCondCode(ISD::SETLT));
+ GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion. At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+ assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!");
+
+ // FIXME: This generated code sucks. We want to generate
+ // FCMPU crN, hi1, hi2
+ // BNE crN, L:
+ // FCMPU crN, lo1, lo2
+ // The following can be improved, but not that much.
+ SDValue Tmp1, Tmp2, Tmp3;
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETOEQ);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, CCCode);
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETUNE);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode);
+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+ NewRHS = SDValue(); // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+ N->getOperand(0), DAG.getValueType(MVT::f64));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+ APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
+ SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+ DAG.getNode(ISD::FSUB, dl,
+ MVT::ppcf128,
+ N->getOperand(0),
+ Tmp)),
+ DAG.getConstant(0x80000000, MVT::i32)),
+ DAG.getNode(ISD::FP_TO_SINT, dl,
+ MVT::i32, N->getOperand(0)),
+ DAG.getCondCode(ISD::SETGE));
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
+ false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+ ST->getValue().getValueType());
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+ (void)NVT;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(ST->getValue(), Lo, Hi);
+
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
+ ST->getPointerInfo(),
+ ST->getMemoryVT(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 0000000..d19c13b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,3042 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type. For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type. For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to promote this operator!");
+ case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
+ case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SIGN_EXTEND_INREG:
+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+ case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
+ case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+
+ case ISD::EXTRACT_SUBVECTOR:
+ Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::BUILD_VECTOR:
+ Res = PromoteIntRes_BUILD_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS:
+ Res = PromoteIntRes_CONCAT_VECTORS(N); break;
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+ case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+ case ISD::SDIV:
+ case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;
+
+ case ISD::UDIV:
+ case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+ case ISD::UADDO:
+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+ case ISD::SMULO:
+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+ case ISD::ATOMIC_LOAD:
+ Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+ }
+
+ // If the result is null then the sub-method took care of registering it.
+ if (Res.getNode())
+ SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetPromotedInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+ // Sign-extend the new bits, and continue the assertion.
+ SDValue Op = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+ // Zero the new bits, and continue the assertion.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
+ EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), ResVT,
+ N->getChain(), N->getBasePtr(),
+ N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(),
+ Op2, N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), N->getChain(), N->getBasePtr(),
+ Op2, Op3, N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
+ // The input promotes to the same size. Convert the promoted value.
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element to an integer and promote it by hand.
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ break;
+ case TargetLowering::TypeSplitVector: {
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ }
+ case TargetLowering::TypeWidenVector:
+ // The input is widened to the same size. Convert to the widened value.
+ // Make sure that the outgoing value is not a vector, because this would
+ // make us bitcast between two vectors which are legalized in different ways.
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+ }
+
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ CreateStackStoreLoad(InOp, OutVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, TLI.getPointerTy()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+ // The pair element type may be legal, or may not promote to the same type as
+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)), JoinIntegers(N->getOperand(0),
+ N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = N->getDebugLoc();
+ // Zero extend things like i1, sign extend everything else. It shouldn't
+ // matter in theory which one we pick, but this tends to give better code?
+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Result = DAG.getNode(Opc, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+ SDValue(N, 0));
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+ "can only promote integers");
+ EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ // Subtract off the extra leading bits in the bigger type.
+ return DAG.getNode(ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.setBit(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NewOpc = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+ // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
+ // and SINT conversions are Custom, there is no way to tell which is
+ // preferable. We choose SINT because that's the right thing on PPC.)
+ if (N->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+ DAG.getValueType(N->getValueType(0).getScalarType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+ return DAG.getNode(ISD::AssertZext, dl,
+ NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+ // If the result and operand types are the same after promotion, simplify
+ // to an in-register extension.
+ if (NVT == Res.getValueType()) {
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(N->getOperand(0).getValueType()));
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendInReg(Res, dl,
+ N->getOperand(0).getValueType().getScalarType());
+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+ return Res;
+ }
+ }
+
+ // Otherwise, just extend the original operand all the way to the larger type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ISD::LoadExtType ExtType =
+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getPointerInfo(),
+ N->getMemoryVT(), N->isVolatile(),
+ N->isNonTemporal(), N->getAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+ // Simply change the return type of the boolean result.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT ValueVTs[] = { N->getValueType(0), NVT };
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ DAG.getVTList(ValueVTs, 2), Ops, 2);
+
+ // Modified the sum result - switch anything that used the old sum to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 0), Res);
+
+ return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // sign extension of its truncation to the original type.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: sign extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(OVT));
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
+ SDValue Mask = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy));
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
+ LHS.getValueType(), Mask, LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(2));
+ SDValue RHS = GetPromotedInteger(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+ EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
+ DebugLoc dl = N->getDebugLoc();
+ assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+ "Vector compare must return a vector result!");
+
+ // Get the SETCC result using the canonical SETCC type.
+ SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+
+ assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+ // Convert to the expected type.
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = GetPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ // The input value must be properly sign extended.
+ SDValue Res = SExtPromotedInteger(N->getOperand(0));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ // The input value must be properly zero extended.
+ SDValue Res = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res;
+ SDValue InOp = N->getOperand(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InOp.getValueType())) {
+ default: llvm_unreachable("Unknown type action!");
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypeExpandInteger:
+ Res = InOp;
+ break;
+ case TargetLowering::TypePromoteInteger:
+ Res = GetPromotedInteger(InOp);
+ break;
+ case TargetLowering::TypeSplitVector:
+ EVT InVT = InOp.getValueType();
+ assert(InVT.isVector() && "Cannot split scalar types");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert(NumElts == NVT.getVectorNumElements() &&
+ "Dst and Src must have the same number of elements");
+ assert(isPowerOf2_32(NumElts) &&
+ "Promoted vector type must be a power of two");
+
+ SDValue EOp1, EOp2;
+ GetSplitVector(InOp, EOp1, EOp2);
+
+ EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+ NumElts/2);
+ EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+ EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
+ }
+
+ // Truncate to NVT instead of VT
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // zero extension of its truncation to the original type.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: zero extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the
+ // input to the larger type, do the multiply (checking if it overflows),
+ // then also check the high bits of the result to see if overflow happened
+ // there.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
+ SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);
+
+ // Overflow occurred if it occurred in the larger type, or if the high part
+ // of the result does not zero/sign-extend the low part. Check this second
+ // possibility first.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred if the high part is non-zero.
+ SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+ } else {
+ // Signed overflow occurred if the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // The only other way for overflow to occur is if the multiplication in the
+ // larger type itself overflowed.
+ Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
+ SDValue(Mul.getNode(), 1));
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+ // The argument is passed as NumRegs registers of type RegVT.
+
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+ Chain = Parts[i].getValue(1);
+ }
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::reverse(Parts.begin(), Parts.end());
+
+ // Assemble the parts in the promoted type.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+ for (unsigned i = 1; i < NumRegs; ++i) {
+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+ // Shift it to the right position and "or" it in.
+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+ DAG.getConstant(i * RegVT.getSizeInBits(),
+ TLI.getPointerTy()));
+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+ }
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to promote this operator's operand!");
+
+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::ATOMIC_STORE:
+ Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
+ break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+ case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::FP16_TO_FP32:
+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions, but zero extend is cheaper on
+ // many machines (an AND instead of two shifts), so prefer it.
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer comparison!");
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGE:
+ case ISD::SETUGT:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ // ALL of these operations will work if we either sign or zero extend
+ // the operands (including the unsigned comparisons!). Zero extend is
+ // usually a simpler/cheaper operation, so prefer it.
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ break;
+ case ISD::SETGE:
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ NewLHS = SExtPromotedInteger(NewLHS);
+ NewRHS = SExtPromotedInteger(NewRHS);
+ break;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ return DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(),
+ N->getOrdering(), N->getSynchScope());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+ // This should only occur in unusual situations like bitcasting to an
+ // x86_fp80, so just turn it into a store+load
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+ // legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT SVT = TLI.getSetCCResultType(MVT::Other);
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+
+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+ // Since the result type is legal, the operands must promote to it.
+ EVT OVT = N->getOperand(0).getValueType();
+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Hi = GetPromotedInteger(N->getOperand(1));
+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+ DebugLoc dl = N->getDebugLoc();
+
+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+ DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type is not. This implies
+ // that the vector is a power-of-two in length and that the element
+ // type does not have a strange size (eg: it is not i1).
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+
+ // Promote the inserted value. The type does not need to match the
+ // vector element type. Check that any extra bits introduced will be
+ // truncated away.
+ assert(N->getOperand(0).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+
+ SmallVector<SDValue, 16> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i)
+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+ return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+ "can only promote integer arguments");
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+ unsigned OpNo) {
+ if (OpNo == 1) {
+ // Promote the inserted value. This is valid because the type does not
+ // have to match the vector element type.
+
+ // Check that any extra bits introduced will be truncated away.
+ assert(N->getOperand(1).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ GetPromotedInteger(N->getOperand(1)),
+ N->getOperand(2)),
+ 0);
+ }
+
+ assert(OpNo == 2 && "Different operand and result vector types?");
+
+ // Promote the index.
+ SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), Idx), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
+ SDValue NewOps[6];
+ DebugLoc dl = N->getDebugLoc();
+ NewOps[0] = N->getOperand(0);
+ for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
+ SDValue Flag = GetPromotedInteger(N->getOperand(i));
+ NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
+ }
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+ // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+ // the operand in place.
+ return SDValue(DAG.UpdateNodeOperands(N,
+ GetPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote the condition!");
+ SDValue Cond = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT SVT = TLI.getSetCCResultType(N->getOpcode() == ISD::SELECT ?
+ OpTy.getScalarType() : OpTy);
+ Cond = PromoteTargetBoolean(Cond, SVT);
+
+ return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+ // The CC (#4) and the possible return values (#2 and #3) have legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+ // The CC (#2) is always legal.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+ Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
+
+ // Truncate the value and store the result.
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(),
+ isVolatile, isNonTemporal, Alignment);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getZeroExtendInReg(Op, dl,
+ N->getOperand(0).getValueType().getScalarType());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+ case ISD::ADDC:
+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+ case ISD::UADDO:
+ case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'. Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc DL = N->getDebugLoc();
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ EVT NVT = InL.getValueType();
+ unsigned VTBits = N->getValueType(0).getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ EVT ShTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::SHL) {
+ if (Amt > VTBits) {
+ Lo = Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = InL;
+ } else if (Amt == 1 &&
+ TLI.isOperationLegalOrCustom(ISD::ADDC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
+ // Emit this X << 1 as X+X.
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+ SDValue LoOps[2] = { InL, InL };
+ Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
+ SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
+ Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::SRL) {
+ if (Amt > VTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRL, DL,
+ NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+ return;
+ }
+
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ if (Amt > VTBits) {
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(Amt-NVTBits, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount. If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+ unsigned NVTBits = NVT.getScalarType().getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne);
+
+ // If we don't know anything about the high bits, exit.
+ if (((KnownZero|KnownOne) & HighBitMask) == 0)
+ return false;
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // If we know that any of the high bits of the shift amount are one, then we
+ // can do this as a couple of simple shifts.
+ if (KnownOne.intersects(HighBitMask)) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+ DAG.getConstant(~HighBitMask, ShTy));
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+ // If we know that all of the high bits of the shift amount are zero, then we
+ // can do this as a couple of simple shifts.
+ if ((KnownZero & HighBitMask) == HighBitMask) {
+ // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+ // shift if x is zero. We can use XOR here because x is known to be smaller
+ // than 32.
+ SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+ DAG.getConstant(NVTBits-1, ShTy));
+
+ unsigned Op1, Op2;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+ case ISD::SRL:
+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+ }
+
+ // When shifting right the arithmetic for Lo and Hi is swapped.
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(InL, InH);
+
+ // Use a little trick to get the bits that move from Lo to Hi. First
+ // shift by one bit.
+ SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+ // Then compute the remaining shift with amount-1.
+ SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(Hi, Lo);
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+ SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+ SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
+
+ SDValue LoS, HiS, LoL, HiL;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ // Short: ShAmt < NVTBits
+ LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ HiS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ LoL = DAG.getConstant(0, NVT); // Lo part is zero.
+ HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRL:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getConstant(0, NVT); // Hi part is zero.
+ LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRA:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+ return true;
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ EVT NVT = LHSL.getValueType();
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+ // them. TODO: Teach operation legalization how to expand unsupported
+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
+ // generate a value of this type in the expanded code sequence.
+ bool hasCarry =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::ADDC : ISD::SUBC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+ if (hasCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ } else {
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ if (N->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Res = DisintegrateMERGE_VALUES(N, ResNo);
+ SplitInteger(Res, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part replicates the sign bit of Lo, make it explicit.
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part must be zero, make it explicit.
+ Hi = DAG.getConstant(0, NVT);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+ const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl),
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl),
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
+ bool isInvariant = N->isInvariant();
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ EVT MemVT = N->getMemoryVT();
+
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+ MemVT, isVolatile, isNonTemporal, Alignment);
+
+ // Remember the chain.
+ Ch = Lo.getValue(1);
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+ // The high part is undefined.
+ Hi = DAG.getUNDEF(NVT);
+ }
+ } else if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
+ isVolatile, isNonTemporal, isInvariant, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned loads at
+ // the cost of some bit-fiddling.
+ EVT MemVT = N->getMemoryVT();
+ unsigned EBytes = MemVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+ // Load both the high bits and maybe some of the low bits.
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ MemVT.getSizeInBits() - ExcessBits),
+ isVolatile, isNonTemporal, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Load the rest of the low bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer low bits from the bottom of Hi to the top of Lo.
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ // Move high bits to the right position in Hi.
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
+ NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ }
+ }
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
+ if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ unsigned OuterBitSize = VT.getSizeInBits();
+ unsigned InnerBitSize = NVT.getSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ if (HasUMUL_LOHI) {
+ // We can emit a umul_lohi.
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHU) {
+ // We can emit a mulhu+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ if (HasSMUL_LOHI) {
+ // We can emit a smul_lohi.
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHS) {
+ // We can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (HasUMUL_LOHI) {
+ // Lo,Hi = umul LHS, RHS.
+ SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(NVT, NVT), LL, RL);
+ Lo = UMulLOHI;
+ Hi = UMulLOHI.getValue(1);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ if (HasMULHU) {
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ }
+
+ // If nothing else, we can make a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl),
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(Node, 1), Cmp);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we can emit an efficient shift operation, do so now. Check to see if
+ // the RHS is a constant.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+
+ // If we can determine that the high bit of the shift is zero or one, even if
+ // the low bits are variable, emit this shift in an optimized form.
+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+ return;
+
+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
+ unsigned PartsOpc;
+ if (N->getOpcode() == ISD::SHL) {
+ PartsOpc = ISD::SHL_PARTS;
+ } else if (N->getOpcode() == ISD::SRL) {
+ PartsOpc = ISD::SRL_PARTS;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ PartsOpc = ISD::SRA_PARTS;
+ }
+
+ // Next check to see if the target supports this SHL_PARTS operation or if it
+ // will custom expand it.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ EVT VT = LHSL.getValueType();
+
+ // If the shift amount operand is coming from a vector legalization it may
+ // have an illegal type. Fix that first by casting the operand, otherwise
+ // the new SHL_PARTS operation would need further legalization.
+ SDValue ShiftOp = N->getOperand(1);
+ EVT ShiftTy = TLI.getShiftAmountTy(VT);
+ assert(ShiftTy.getScalarType().getSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
+ "ShiftAmountTy is too small to cover the range of this type!");
+ if (ShiftOp.getValueType() != ShiftTy)
+ ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
+
+ SDValue Ops[] = { LHSL, LHSH, ShiftOp };
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+ Hi = Lo.getValue(1);
+ return;
+ }
+
+ // Otherwise, emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ bool isSigned;
+ if (N->getOpcode() == ISD::SHL) {
+ isSigned = false; /*sign irrelevant*/
+ if (VT == MVT::i16)
+ LC = RTLIB::SHL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SHL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SHL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SHL_I128;
+ } else if (N->getOpcode() == ISD::SRL) {
+ isSigned = false;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRL_I128;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ isSigned = true;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRA_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRA_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRA_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRA_I128;
+ }
+
+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+ return;
+ }
+
+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+ llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is sign extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+ // The high part is obtained by SRA'ing all but one of the bits of low part.
+ unsigned LoSize = NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ if (EVT.bitsLE(Lo.getValueType())) {
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+ N->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+ DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+ TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. Leave the low part alone,
+ // sext_inreg the high part.
+ unsigned ExcessBits =
+ EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+ Hi = DAG.getNode(ISD::SRL, dl,
+ N->getOperand(0).getValueType(), N->getOperand(0),
+ DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+ // overflows iff a - b > a.
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
+ N->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // A divide for UMULO should be faster than a function call.
+ if (N->getOpcode() == ISD::UMULO) {
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+
+ SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
+ SplitInteger(MUL, Lo, Hi);
+
+ // A divide for UMULO will be faster than a function call. Select to
+ // make sure we aren't using 0.
+ SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ RHS, DAG.getConstant(0, VT), ISD::SETEQ);
+ SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+ DAG.getConstant(1, VT), RHS);
+ SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
+ SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
+ ISD::SETNE);
+ Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero,
+ DAG.getConstant(0, N->getValueType(1)),
+ Overflow);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
+
+ Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+ EVT PtrVT = TLI.getPointerTy();
+ Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+
+ // Replace this with a libcall that will check overflow.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::MULO_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MULO_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MULO_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+ // Temporary for the overflow value, default it to zero.
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
+ DAG.getConstant(0, PtrVT), Temp,
+ MachinePointerInfo(), false, false, 0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = N->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = N->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the address of the overflow check.
+ Entry.Node = Temp;
+ Entry.Ty = PtrTy->getPointerTo();
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, RetTy, true, false, false, false,
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Func, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ SplitInteger(CallInfo.first, Lo, Hi);
+ SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
+ MachinePointerInfo(), false, false, false, 0);
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+ DAG.getConstant(0, PtrVT),
+ ISD::SETNE);
+ // Use the overflow from the libcall everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is zero extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+ Hi = DAG.getConstant(0, NVT); // The high part is just a zero.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getZeroExtendInReg(Hi, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
+ N->getOperand(0),
+ N->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ case ISD::RETURNADDR:
+ case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+
+ case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ if (RHSLo == RHSHi) {
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+ if (RHSCST->isAllOnesValue()) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl,
+ LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
+ }
+ }
+ }
+
+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ return;
+ }
+
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+ if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ NewLHS = LHSHi;
+ NewRHS = RHSHi;
+ return;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison
+ // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL);
+ SDValue Tmp1, Tmp2;
+ Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!Tmp1.getNode())
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC);
+ Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!Tmp2.getNode())
+ Tmp2 = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+ ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+ if ((Tmp1C && Tmp1C->isNullValue()) ||
+ (Tmp2C && Tmp2C->isNullValue() &&
+ (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+ (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+ (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+ CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+ // low part is known false, returns high part.
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT, if high part is known true, ignore the low part.
+ NewLHS = Tmp2;
+ NewRHS = SDValue();
+ return;
+ }
+
+ NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ, false,
+ DagCombineInfo, dl);
+ if (!NewLHS.getNode())
+ NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
+ NewLHS, Tmp1, Tmp2);
+ NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+ // The value being shifted is legal, but the shift amount is too big.
+ // It follows that either the result of the shift is undefined, or the
+ // upper half of the shift amount is zero. Just use the lower half.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(1), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+ // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
+ // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+ // constant to valid type.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this SINT_TO_FP!");
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+
+ EVT VT = N->getOperand(1).getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(), isVolatile, isNonTemporal,
+ Alignment);
+ }
+
+ if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+ HiVT, isVolatile, isNonTemporal, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Just truncate the low part of the source.
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DstVT. Check that the mantissa
+ // size of DstVT is >= than the number of bits in SrcVT -1.
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT);
+ if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 &&
+ TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // Do a signed conversion then adjust the result.
+ SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+ SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+ // The result of the signed conversion needs adjusting if the 'sign bit' of
+ // the incoming integer was set. To handle this, we dynamically test to see
+ // if it is set, and, if so, add a fudge factor.
+
+ const uint64_t F32TwoE32 = 0x4F800000ULL;
+ const uint64_t F32TwoE64 = 0x5F800000ULL;
+ const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+ APInt FF(32, 0);
+ if (SrcVT == MVT::i32)
+ FF = APInt(32, F32TwoE32);
+ else if (SrcVT == MVT::i64)
+ FF = APInt(32, F32TwoE64);
+ else if (SrcVT == MVT::i128)
+ FF = APInt(32, F32TwoE128);
+ else
+ llvm_unreachable("Unsupported UINT_TO_FP!");
+
+ // Check whether the sign bit is set.
+ SDValue Lo, Hi;
+ GetExpandedInteger(Op, Lo, Hi);
+ SDValue SignSet = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(Hi.getValueType()),
+ Hi, DAG.getConstant(0, Hi.getValueType()),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ TLI.getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ if (TLI.isBigEndian()) std::swap(Zero, Four);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+ FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+ Alignment = std::min(Alignment, 4u);
+
+ // Load the value out, extending it from f32 to the destination float type.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32,
+ false, false, Alignment);
+ return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+ }
+
+ // Otherwise, use a libcall.
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this UINT_TO_FP!");
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(),
+ N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+ return Swap.getValue(1);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned OutNumElems = OutVT.getVectorNumElements();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue BaseIdx = N->getOperand(1);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(OutNumElems);
+ for (unsigned i = 0; i != OutNumElems; ++i) {
+
+ // Extract the element from the original vector.
+ SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+ BaseIdx, DAG.getIntPtrConstant(i));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InVT.getVectorElementType(), N->getOperand(0), Index);
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+ // Insert the converted element to the new vector.
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ NewMask.push_back(SV->getMaskElt(i));
+ }
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumElems = N->getNumOperands();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(NumElems);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(!N->getOperand(0).getValueType().isVector() &&
+ "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT InElemTy = OutVT.getVectorElementType();
+ EVT OutElemTy = NOutVT.getVectorElementType();
+
+ unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
+ unsigned NumOutElem = NOutVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+ assert(NumElem * NumOperands == NumOutElem &&
+ "Unexpected number of elements");
+
+ // Take the elements from the first vector.
+ SmallVector<SDValue, 8> Ops(NumOutElem);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ SDValue Op = N->getOperand(i);
+ for (unsigned j = 0; j < NumElem; ++j) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InElemTy, Op, DAG.getIntPtrConstant(j));
+ Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+
+ SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+ NOutVTElem, N->getOperand(1));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT,
+ V0, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = N->getOperand(1);
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ V0->getValueType(0).getScalarType(), V0, V1);
+
+ // EXTRACT_VECTOR_ELT can return types which are wider than the incoming
+ // element types. If this is the case then we need to expand the outgoing
+ // value and not truncate it.
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElems = N->getNumOperands();
+
+ EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+ SmallVector<SDValue, 8> NewOps;
+ NewOps.reserve(NumElems);
+
+ // For each incoming vector
+ for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) {
+ SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+ EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+ unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+ for (unsigned i=0; i<NumElem; ++i) {
+ // Extract element from incoming vector
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
+ Incoming, DAG.getIntPtrConstant(i));
+ SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+ NewOps.push_back(Tr);
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0),
+ &NewOps[0], NewOps.size());
+ }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 0000000..b6436bf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1145 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method. It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types. This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+ // If a node is not processed, then none of its values should be mapped by any
+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+ // If a node is processed, then each value with an illegal type must be mapped
+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+ // Values with a legal type may be mapped by ReplacedValues, but not by any of
+ // the other maps.
+
+ // Note that these invariants may not hold momentarily when processing a node:
+ // the node being processed may be put in a map before being marked Processed.
+
+ // Note that it is possible to have nodes marked NewNode in the DAG. This can
+ // occur in two ways. Firstly, a node may be created during legalization but
+ // never passed to the legalization core. This is usually due to the implicit
+ // folding that occurs when using the DAG.getNode operators. Secondly, a new
+ // node may be passed to the legalization core, but when analyzed may morph
+ // into a different node, leaving the original node as a NewNode in the DAG.
+ // A node may morph if one of its operands changes during analysis. Whether
+ // it actually morphs or not depends on whether, after updating its operands,
+ // it is equivalent to an existing node: if so, it morphs into that existing
+ // node (CSE). An operand can change during analysis if the operand is a new
+ // node that morphs, or it is a processed value that was mapped to some other
+ // value (as recorded in ReplacedValues) in which case the operand is turned
+ // into that other value. If a node morphs then the node it morphed into will
+ // be used instead of it for legalization, however the original node continues
+ // to live on in the DAG.
+ // The conclusion is that though there may be nodes marked NewNode in the DAG,
+ // all uses of such nodes are also marked NewNode: the result is a fungus of
+ // NewNodes growing on top of the useful nodes, and perhaps using them, but
+ // not used by them.
+
+ // If a value is mapped by ReplacedValues, then it must have no uses, except
+ // by nodes marked NewNode (see above).
+
+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+ // Note that ReplacedValues should be applied iteratively.
+
+ // Note that the ReplacedValues map may also map deleted nodes (by iterating
+ // over the DAG we never dereference deleted nodes). This means that it may
+ // also map nodes marked NewNode if the deallocated memory was reallocated as
+ // another node, and that new node was not seen by the LegalizeTypes machinery
+ // (for example because it was created but not used). In general, we cannot
+ // distinguish between new nodes and deleted nodes.
+ SmallVector<SDNode*, 16> NewNodes;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ // Remember nodes marked NewNode - they are subject to extra checking below.
+ if (I->getNodeId() == NewNode)
+ NewNodes.push_back(I);
+
+ for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
+ SDValue Res(I, i);
+ bool Failed = false;
+
+ unsigned Mapped = 0;
+ if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ SDValue NewVal = ReplacedValues[Res];
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+ while (I != ReplacedValues.end()) {
+ NewVal = I->second;
+ I = ReplacedValues.find(NewVal);
+ }
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
+ }
+ if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+ Mapped |= 2;
+ if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+ Mapped |= 4;
+ if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+ Mapped |= 8;
+ if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+ Mapped |= 16;
+ if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+ Mapped |= 32;
+ if (SplitVectors.find(Res) != SplitVectors.end())
+ Mapped |= 64;
+ if (WidenedVectors.find(Res) != WidenedVectors.end())
+ Mapped |= 128;
+
+ if (I->getNodeId() != Processed) {
+ // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+ // marked NewNode too, since a deleted node may have been reallocated as
+ // another node that has not been seen by the LegalizeTypes machinery.
+ if ((I->getNodeId() == NewNode && Mapped > 1) ||
+ (I->getNodeId() != NewNode && Mapped != 0)) {
+ dbgs() << "Unprocessed value in a map!";
+ Failed = true;
+ }
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+ if (Mapped > 1) {
+ dbgs() << "Value with legal type was transformed!";
+ Failed = true;
+ }
+ } else {
+ if (Mapped == 0) {
+ dbgs() << "Processed value not in any map!";
+ Failed = true;
+ } else if (Mapped & (Mapped - 1)) {
+ dbgs() << "Value in multiple maps!";
+ Failed = true;
+ }
+ }
+
+ if (Failed) {
+ if (Mapped & 1)
+ dbgs() << " ReplacedValues";
+ if (Mapped & 2)
+ dbgs() << " PromotedIntegers";
+ if (Mapped & 4)
+ dbgs() << " SoftenedFloats";
+ if (Mapped & 8)
+ dbgs() << " ScalarizedVectors";
+ if (Mapped & 16)
+ dbgs() << " ExpandedIntegers";
+ if (Mapped & 32)
+ dbgs() << " ExpandedFloats";
+ if (Mapped & 64)
+ dbgs() << " SplitVectors";
+ if (Mapped & 128)
+ dbgs() << " WidenedVectors";
+ dbgs() << "\n";
+ llvm_unreachable(0);
+ }
+ }
+ }
+
+ // Checked that NewNodes are only used by other NewNodes.
+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+ SDNode *N = NewNodes[i];
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ }
+}
+
+/// run - This is the main entry point for the type legalizer. This does a
+/// top-down traversal of the dag, legalizing types as it goes. Returns "true"
+/// if it made any changes.
+bool DAGTypeLegalizer::run() {
+ bool Changed = false;
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+ Dummy.setNodeId(Unanalyzed);
+
+ // The root of the dag may dangle to deleted nodes until the type legalizer is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+ // non-leaves.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ if (I->getNumOperands() == 0) {
+ I->setNodeId(ReadyToProcess);
+ Worklist.push_back(I);
+ } else {
+ I->setNodeId(Unanalyzed);
+ }
+ }
+
+ // Now that we have a set of nodes to process, handle them all.
+ while (!Worklist.empty()) {
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+ assert(N->getNodeId() == ReadyToProcess &&
+ "Node should be ready if on worklist!");
+
+ if (IgnoreNodeResults(N))
+ goto ScanOperands;
+
+ // Scan the values produced by the node, checking to see if any result
+ // types are illegal.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+ EVT ResultVT = N->getValueType(i);
+ switch (getTypeAction(ResultVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ // The following calls must take care of *all* of the node's results,
+ // not just the illegal result they were passed (this includes results
+ // with a legal type). Results can be remapped using ReplaceValueWith,
+ // or their promoted/expanded/etc values registered in PromotedIntegers,
+ // ExpandedIntegers etc.
+ case TargetLowering::TypePromoteInteger:
+ PromoteIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandInteger:
+ ExpandIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSoftenFloat:
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandFloat:
+ ExpandFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeScalarizeVector:
+ ScalarizeVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSplitVector:
+ SplitVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeWidenVector:
+ WidenVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ }
+ }
+
+ScanOperands:
+ // Scan the operand list for the node, handling any nodes with operands that
+ // are illegal.
+ {
+ unsigned NumOperands = N->getNumOperands();
+ bool NeedsReanalyzing = false;
+ unsigned i;
+ for (i = 0; i != NumOperands; ++i) {
+ if (IgnoreNodeResults(N->getOperand(i).getNode()))
+ continue;
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ switch (getTypeAction(OpVT)) {
+ case TargetLowering::TypeLegal:
+ continue;
+ // The following calls must either replace all of the node's results
+ // using ReplaceValueWith, and return "false"; or update the node's
+ // operands in place, and return "true".
+ case TargetLowering::TypePromoteInteger:
+ NeedsReanalyzing = PromoteIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandInteger:
+ NeedsReanalyzing = ExpandIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ NeedsReanalyzing = SoftenFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandFloat:
+ NeedsReanalyzing = ExpandFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSplitVector:
+ NeedsReanalyzing = SplitVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeWidenVector:
+ NeedsReanalyzing = WidenVectorOperand(N, i);
+ Changed = true;
+ break;
+ }
+ break;
+ }
+
+ // The sub-method updated N in place. Check to see if any operands are new,
+ // and if so, mark them. If the node needs revisiting, don't add all users
+ // to the worklist etc.
+ if (NeedsReanalyzing) {
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(NewNode);
+ // Recompute the NodeId and correct processed operands, adding the node to
+ // the worklist if ready.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M == N)
+ // The node didn't morph - nothing special to do, it will be revisited.
+ continue;
+
+ // The node morphed - this is equivalent to legalizing by replacing every
+ // value of N with the corresponding value of M. So do that now.
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ // Replacing the value takes care of remapping the new value.
+ ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+ assert(N->getNodeId() == NewNode && "Unexpected node state!");
+ // The node continues to live on as part of the NewNode fungus that
+ // grows on top of the useful nodes. Nothing more needs to be done
+ // with it - move on to the next node.
+ continue;
+ }
+
+ if (i == NumOperands) {
+ DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+ }
+ }
+NodeDone:
+
+ // If we reach here, the node was processed, potentially creating new nodes.
+ // Mark it as processed and add its users to the worklist as appropriate.
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(Processed);
+
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ int NodeId = User->getNodeId();
+
+ // This node has two options: it can either be a new node or its Node ID
+ // may be a count of the number of operands it has that are not ready.
+ if (NodeId > 0) {
+ User->setNodeId(NodeId-1);
+
+ // If this was the last use it was waiting on, add it to the ready list.
+ if (NodeId-1 == ReadyToProcess)
+ Worklist.push_back(User);
+ continue;
+ }
+
+ // If this is an unreachable new node, then ignore it. If it ever becomes
+ // reachable by being used by a newly created node then it will be handled
+ // by AnalyzeNewNode.
+ if (NodeId == NewNode)
+ continue;
+
+ // Otherwise, this node is new: this is the first operand of it that
+ // became ready. Its new NodeId is the number of operands it has minus 1
+ // (as this node is now processed).
+ assert(NodeId == Unanalyzed && "Unknown node ID!");
+ User->setNodeId(User->getNumOperands() - 1);
+
+ // If the node only has a single operand, it is now ready.
+ if (User->getNumOperands() == 1)
+ Worklist.push_back(User);
+ }
+ }
+
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
+
+ // Remove dead nodes. This is important to do for cleanliness but also before
+ // the checking loop below. Implicit folding by the DAG.getNode operators and
+ // node morphing can cause unreachable nodes to be around with their flags set
+ // to new.
+ DAG.RemoveDeadNodes();
+
+ // In a debug build, scan all the nodes to make sure we found them all. This
+ // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ bool Failed = false;
+
+ // Check that all result types are legal.
+ if (!IgnoreNodeResults(I))
+ for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(I->getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ // Check that all operand types are legal.
+ for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
+ !isTypeLegal(I->getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ if (I->getNodeId() != Processed) {
+ if (I->getNodeId() == NewNode)
+ dbgs() << "New node not analyzed?\n";
+ else if (I->getNodeId() == Unanalyzed)
+ dbgs() << "Unanalyzed node not noticed?\n";
+ else if (I->getNodeId() > 0)
+ dbgs() << "Operand not processed?\n";
+ else if (I->getNodeId() == ReadyToProcess)
+ dbgs() << "Not added to worklist?\n";
+ Failed = true;
+ }
+
+ if (Failed) {
+ I->dump(&DAG); dbgs() << "\n";
+ llvm_unreachable(0);
+ }
+ }
+#endif
+
+ return Changed;
+}
+
+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
+/// new nodes. Correct any processed operands (this may change the node) and
+/// calculate the NodeId. If the node itself changes to a processed node, it
+/// is not remapped - the caller needs to take care of this.
+/// Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+ // If this was an existing node that is already done, we're done.
+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+ return N;
+
+ // Remove any stale map entries.
+ ExpungeNode(N);
+
+ // Okay, we know that this node is new. Recursively walk all of its operands
+ // to see if they are new also. The depth of this walk is bounded by the size
+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+ // about revisiting of nodes.
+ //
+ // As we walk the operands, keep track of the number of nodes that are
+ // processed. If non-zero, this will become the new nodeid of this node.
+ // Operands may morph when they are analyzed. If so, the node will be
+ // updated after all operands have been analyzed. Since this is rare,
+ // the code tries to minimize overhead in the non-morphing case.
+
+ SmallVector<SDValue, 8> NewOps;
+ unsigned NumProcessed = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue OrigOp = N->getOperand(i);
+ SDValue Op = OrigOp;
+
+ AnalyzeNewValue(Op); // Op may morph.
+
+ if (Op.getNode()->getNodeId() == Processed)
+ ++NumProcessed;
+
+ if (!NewOps.empty()) {
+ // Some previous operand changed. Add this one to the list.
+ NewOps.push_back(Op);
+ } else if (Op != OrigOp) {
+ // This is the first operand to change - add all operands so far.
+ NewOps.append(N->op_begin(), N->op_begin() + i);
+ NewOps.push_back(Op);
+ }
+ }
+
+ // Some operands changed - update the node.
+ if (!NewOps.empty()) {
+ SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size());
+ if (M != N) {
+ // The node morphed into a different node. Normally for this to happen
+ // the original node would have to be marked NewNode. However this can
+ // in theory momentarily not be the case while ReplaceValueWith is doing
+ // its stuff. Mark the original node NewNode to help sanity checking.
+ N->setNodeId(NewNode);
+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+ // It morphed into a previously analyzed node - nothing more to do.
+ return M;
+
+ // It morphed into a different new node. Do the equivalent of passing
+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need
+ // to remap the operands, since they are the same as the operands we
+ // remapped above.
+ N = M;
+ ExpungeNode(N);
+ }
+ }
+
+ // Calculate the NodeId.
+ N->setNodeId(N->getNumOperands() - NumProcessed);
+ if (N->getNodeId() == ReadyToProcess)
+ Worklist.push_back(N);
+
+ return N;
+}
+
+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+ Val.setNode(AnalyzeNewNode(Val.getNode()));
+ if (Val.getNode()->getNodeId() == Processed)
+ // We were passed a processed node, or it morphed into one - remap it.
+ RemapValue(Val);
+}
+
+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings. In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+ if (N->getNodeId() != NewNode)
+ return;
+
+ // If N is not remapped by ReplacedValues then there is nothing to do.
+ unsigned i, e;
+ for (i = 0, e = N->getNumValues(); i != e; ++i)
+ if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+ break;
+
+ if (i == e)
+ return;
+
+ // Remove N from all maps - this is expensive but rare.
+
+ for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+ E = PromotedIntegers.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+ E = SoftenedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+ E = ScalarizedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+ E = WidenedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+ E = ReplacedValues.end(); I != E; ++I)
+ RemapValue(I->second);
+
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ ReplacedValues.erase(SDValue(N, i));
+}
+
+/// RemapValue - If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+ if (I != ReplacedValues.end()) {
+ // Use path compression to speed up future lookups if values get multiply
+ // replaced with other values.
+ RemapValue(I->second);
+ N = I->second;
+ assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+ }
+}
+
+namespace {
+ /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
+ /// updates to nodes and recomputes their ready state.
+ class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+ DAGTypeLegalizer &DTL;
+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+ public:
+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+ SmallSetVector<SDNode*, 16> &nta)
+ : SelectionDAG::DAGUpdateListener(dtl.getDAG()),
+ DTL(dtl), NodesToAnalyze(nta) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ // It is possible, though rare, for the deleted node N to occur as a
+ // target in a map, so note the replacement N -> E in ReplacedValues.
+ assert(E && "Node not replaced?");
+ DTL.NoteDeletion(N, E);
+
+ // In theory the deleted node could also have been scheduled for analysis.
+ // So remove it from the set of nodes which will be analyzed.
+ NodesToAnalyze.remove(N);
+
+ // In general nothing needs to be done for E, since it didn't change but
+ // only gained new uses. However N -> E was just added to ReplacedValues,
+ // and the result of a ReplacedValues mapping is not allowed to be marked
+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.
+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+ NodesToAnalyze.insert(E);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Node updates can mean pretty much anything. It is possible that an
+ // operand was set to something already processed (f.e.) in which case
+ // this node could become ready. Recompute its flags.
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ N->setNodeId(DAGTypeLegalizer::NewNode);
+ NodesToAnalyze.insert(N);
+ }
+ };
+}
+
+
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value. Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ ExpungeNode(From.getNode());
+ AnalyzeNewValue(To); // Expunges To.
+
+ // Anything that used the old node should now use the new one. Note that this
+ // can potentially cause recursive merging.
+ SmallSetVector<SDNode*, 16> NodesToAnalyze;
+ NodeUpdateListener NUL(*this, NodesToAnalyze);
+ do {
+ DAG.ReplaceAllUsesOfValueWith(From, To);
+
+ // The old node may still be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ ReplacedValues[From] = To;
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe
+ // to skip. Note that this is not a morphing node - otherwise it would
+ // still be marked NewNode.
+ continue;
+
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new
+ // node instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ ReplacedValues[OldVal] = NewVal;
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
+ }
+ }
+ // When recursively update nodes with new nodes, it is possible to have
+ // new uses of From due to CSE. If this happens, replace the new uses of
+ // From with To.
+ } while (!From.use_empty());
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted integer");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = PromotedIntegers[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+ OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for softened float");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = SoftenedFloats[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+ OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+ // Note that in some cases vector operation operands may be greater than
+ // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+ // a constant i8 operand.
+ assert(Result.getValueType().getSizeInBits() >=
+ Op.getValueType().getVectorElementType().getSizeInBits() &&
+ "Invalid type for scalarized vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = ScalarizedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+ OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded integer");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded float");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't split");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType().getVectorElementType() ==
+ Op.getValueType().getVectorElementType() &&
+ 2*Lo.getValueType().getVectorNumElements() ==
+ Op.getValueType().getVectorNumElements() &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for split vector");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ assert(Entry.first.getNode() == 0 && "Node already split");
+ Entry.first = Lo;
+ Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for widened vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = WidenedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node already widened!");
+ OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// BitConvertToInteger - Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getSizeInBits();
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+ EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
+/// same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Only applies to vectors!");
+ unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+ EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+ EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+ EVT DestVT) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+/// CustomLowerNode - Replace the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ if (LegalizeResult)
+ TLI.ReplaceNodeResults(N, Results, DAG);
+ else
+ TLI.LowerOperationWrapper(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom lower it after all.
+ return false;
+
+ // Make everything that once used N's values now use those in Results instead.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ // Propagate node ordering
+ DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N));
+ }
+ return true;
+}
+
+
+/// CustomWidenLowerNode - Widen the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ TLI.ReplaceNodeResults(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom widen lower its result after all.
+ return false;
+
+ // Update the widening map.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ SetWidenedVector(SDValue(N, i), Results[i]);
+ return true;
+}
+
+SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ if (i != ResNo)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+ return SDValue(N->getOperand(ResNo));
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split into two not necessarily identical pieces.
+void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
+ // Currently all types are split in half.
+ if (!InVT.isVector()) {
+ LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ } else {
+ unsigned NumElements = InVT.getVectorNumElements();
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+ LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), NumElements/2);
+ }
+}
+
+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+/// high parts of the given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Pair.getDebugLoc();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(1));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
+ SDValue Index) {
+ DebugLoc dl = Index.getDebugLoc();
+ // Make sure the index type is big enough to compute in.
+ if (Index.getValueType().bitsGT(TLI.getPointerTy()))
+ Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
+ else
+ Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EltSize, Index.getValueType()));
+ return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+ // Arbitrarily use dlHi for result DebugLoc
+ DebugLoc dlHi = Hi.getDebugLoc();
+ DebugLoc dlLo = Lo.getDebugLoc();
+ EVT LVT = Lo.getValueType();
+ EVT HVT = Hi.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ LVT.getSizeInBits() + HVT.getSizeInBits());
+
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+ DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// LibCallify - Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+ bool isSigned) {
+ unsigned NumOps = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ if (NumOps == 0) {
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl);
+ } else if (NumOps == 1) {
+ SDValue Op = N->getOperand(0);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl);
+ } else if (NumOps == 2) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl);
+ }
+ SmallVector<SDValue, 8> Ops(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ Ops[i] = N->getOperand(i);
+
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0),
+ &Ops[0], NumOps, isSigned, dl);
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo;
+}
+
+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
+/// of the given type. A target boolean is an integer value, not necessarily of
+/// type i1, the bits of which conform to getBooleanContents.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+ DebugLoc dl = Bool.getDebugLoc();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector()));
+ return DAG.getNode(ExtendCode, dl, VT, Bool);
+}
+
+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
+/// bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+ Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+ DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// SplitInteger - Return the lower and upper halves of Op's bits in a value
+/// type half the size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ SDValue &Lo, SDValue &Hi) {
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
+ Op.getValueType().getSizeInBits()/2);
+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Entry Point
+//===----------------------------------------------------------------------===//
+
+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+/// only uses types natively supported by the target. Returns "true" if it made
+/// any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+ return DAGTypeLegalizer(*this).run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 0000000..54ea926
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,750 @@
+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class. This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAG_LEGALIZETYPES_H
+#define SELECTIONDAG_LEGALIZETYPES_H
+
+#define DEBUG_TYPE "legalize-types"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
+/// on it until only value types the target machine can handle are left. This
+/// involves promoting small sizes to large sizes or splitting up large values
+/// into small values.
+///
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+public:
+ // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
+ // about the state of the node. The enum has all the values.
+ enum NodeIdFlags {
+ /// ReadyToProcess - All operands have been processed, so this node is ready
+ /// to be handled.
+ ReadyToProcess = 0,
+
+ /// NewNode - This is a new node, not before seen, that was created in the
+ /// process of legalizing some other node.
+ NewNode = -1,
+
+ /// Unanalyzed - This node's ID needs to be set to the number of its
+ /// unprocessed operands.
+ Unanalyzed = -2,
+
+ /// Processed - This is a node that has already been processed.
+ Processed = -3
+
+ // 1+ - This is a node which has this many unprocessed operands.
+ };
+private:
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// simple value type, where the two bits correspond to the LegalizeAction
+ /// enum from TargetLowering. This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// getTypeAction - Return how we should legalize values of this type.
+ TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT);
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ bool isTypeLegal(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
+ }
+
+ /// IgnoreNodeResults - Pretend all of this node's results are legal.
+ bool IgnoreNodeResults(SDNode *N) const {
+ return N->getOpcode() == ISD::TargetConstant;
+ }
+
+ /// PromotedIntegers - For integer nodes that are below legal width, this map
+ /// indicates what promoted value to use.
+ SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;
+
+ /// ExpandedIntegers - For integer nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;
+
+ /// SoftenedFloats - For floating point nodes converted to integers of
+ /// the same size, this map indicates the converted value to use.
+ SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
+
+ /// ExpandedFloats - For float nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
+
+ /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
+ /// scalar value of type 'ty' to use.
+ SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;
+
+ /// SplitVectors - For nodes that need to be split this map indicates
+ /// which operands are the expanded version of the input.
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;
+
+ /// WidenedVectors - For vector nodes that need to be widened, indicates
+ /// the widened value to use.
+ SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;
+
+ /// ReplacedValues - For values that have been replaced with another,
+ /// indicates the replacement value to use.
+ SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;
+
+ /// Worklist - This defines a worklist of nodes to process. In order to be
+ /// pushed onto this worklist, all operands of a node must have already been
+ /// processed.
+ SmallVector<SDNode*, 128> Worklist;
+
+public:
+ explicit DAGTypeLegalizer(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+ }
+
+ /// run - This is the main entry point for the type legalizer. This does a
+ /// top-down traversal of the dag, legalizing types as it goes. Returns
+ /// "true" if it made any changes.
+ bool run();
+
+ void NoteDeletion(SDNode *Old, SDNode *New) {
+ ExpungeNode(Old);
+ ExpungeNode(New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+ ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+ }
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+private:
+ SDNode *AnalyzeNewNode(SDNode *N);
+ void AnalyzeNewValue(SDValue &Val);
+ void ExpungeNode(SDNode *N);
+ void PerformExpensiveChecks();
+ void RemapValue(SDValue &N);
+
+ // Common routines.
+ SDValue BitConvertToInteger(SDValue Op);
+ SDValue BitConvertVectorToIntegerVector(SDValue Op);
+ SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+ bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+ bool CustomWidenLowerNode(SDNode *N, EVT VT);
+
+ /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES
+ /// node with the corresponding input operand, except for the result 'ResNo',
+ /// for which the corresponding input operand is returned.
+ SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
+
+ SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+ SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+ void ReplaceValueWith(SDValue From, SDValue To);
+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Promotion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetPromotedInteger - Given a processed operand Op which was promoted to a
+ /// larger integer type, this returns the promoted value. The low bits of the
+ /// promoted value corresponding to the original type are exactly equal to Op.
+ /// The extra bits contain rubbish, so the promoted value may need to be zero-
+ /// or sign-extended from the original type before it is usable (the helpers
+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+ /// For example, if Op is an i16 and was promoted to an i32, then this method
+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+ /// 16 bits of which contain rubbish.
+ SDValue GetPromotedInteger(SDValue Op) {
+ SDValue &PromotedOp = PromotedIntegers[Op];
+ RemapValue(PromotedOp);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedInteger(SDValue Op, SDValue Result);
+
+ /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
+ /// final size.
+ SDValue SExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ }
+
+ /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
+ /// final size.
+ SDValue ZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
+ }
+
+ // Integer Result Promotion.
+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_AssertSext(SDNode *N);
+ SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+ SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
+ SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntRes_Constant(SDNode *N);
+ SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntRes_CTLZ(SDNode *N);
+ SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SDIV(SDNode *N);
+ SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_VSELECT(SDNode *N);
+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+ SDValue PromoteIntRes_SETCC(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UDIV(SDNode *N);
+ SDValue PromoteIntRes_UNDEF(SDNode *N);
+ SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+ // Integer Operand Promotion.
+ bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
+ SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+
+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Expansion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedInteger - Given a processed operand Op which was expanded into
+ /// two integers of half the size, this returns the two halves. The low bits
+ /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+ /// For example, if Op is an i64 which was expanded into two i32's, then this
+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+ /// Op, and Hi being equal to the upper 32 bits.
+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Integer Result Expansion.
+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Integer Operand Expansion.
+ bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandIntOp_BITCAST(SDNode *N);
+ SDValue ExpandIntOp_BR_CC(SDNode *N);
+ SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+ SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+ SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
+
+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op.
+ SDValue GetSoftenedFloat(SDValue Op) {
+ SDValue &SoftenedOp = SoftenedFloats[Op];
+ RemapValue(SoftenedOp);
+ assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+ return SoftenedOp;
+ }
+ void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+ // Result Float to Integer Conversion.
+ void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCEIL(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FDIV(SDNode *N);
+ SDValue SoftenFloatRes_FEXP(SDNode *N);
+ SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+ SDValue SoftenFloatRes_FLOG(SDNode *N);
+ SDValue SoftenFloatRes_FLOG2(SDNode *N);
+ SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMA(SDNode *N);
+ SDValue SoftenFloatRes_FMUL(SDNode *N);
+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatRes_FPOW(SDNode *N);
+ SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREM(SDNode *N);
+ SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSQRT(SDNode *N);
+ SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_UNDEF(SDNode *N);
+ SDValue SoftenFloatRes_VAARG(SDNode *N);
+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+ // Operand Float to Integer Conversion.
+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
+ SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatOp_SETCC(SDNode *N);
+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Float Expansion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedFloat - Given a processed operand Op which was expanded into
+ /// two floating point values of half the size, this returns the two halves.
+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
+ /// into two f64's, then this method returns the two f64's, with Lo being
+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Float Result Expansion.
+ void ExpandFloatResult(SDNode *N, unsigned ResNo);
+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Float Operand Expansion.
+ bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+ SDValue ExpandFloatOp_SETCC(SDNode *N);
+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Scalarization Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetScalarizedVector - Given a processed one-element vector Op which was
+ /// scalarized to its element type, this returns the element. For example,
+ /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+ SDValue GetScalarizedVector(SDValue Op) {
+ SDValue &ScalarizedOp = ScalarizedVectors[Op];
+ RemapValue(ScalarizedOp);
+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+ return ScalarizedOp;
+ }
+ void SetScalarizedVector(SDValue Op, SDValue Result);
+
+ // Vector Result Scalarization: <1 x ty> -> ty.
+ void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_InregOp(SDNode *N);
+
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue ScalarizeVecRes_VSELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+ SDValue ScalarizeVecRes_SETCC(SDNode *N);
+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+ // Vector Operand Scalarization: <1 x ty> -> ty.
+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+ SDValue ScalarizeVecOp_EXTEND(SDNode *N);
+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Splitting Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSplitVector - Given a processed vector Op which was split into vectors
+ /// of half the size, this method returns the halves. The first elements of
+ /// Op coincide with the elements of Lo; the remaining elements of Op coincide
+ /// with the elements of Hi: Op is what you would get by concatenating Lo and
+ /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then
+ /// this method returns the two v4i32's, with Lo corresponding to the first 4
+ /// elements of Op, and Hi to the last 4 elements.
+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+ void SplitVectorResult(SDNode *N, unsigned OpNo);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+ SDValue &Hi);
+
+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_UnaryOp(SDNode *N);
+
+ SDValue SplitVecOp_BITCAST(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_VSETCC(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetWidenedVector - Given a processed vector Op which was widened into a
+ /// larger vector, this method returns the larger vector. The elements of
+ /// the returned vector consist of the elements of Op followed by elements
+ /// containing rubbish. For example, if Op is a v2i32 that was widened to a
+ /// v4i32, then this method returns a v4i32 for which the first two elements
+ /// are the same as those of Op, while the last two elements contain rubbish.
+ SDValue GetWidenedVector(SDValue Op) {
+ SDValue &WidenedOp = WidenedVectors[Op];
+ RemapValue(WidenedOp);
+ assert(WidenedOp.getNode() && "Operand wasn't widened?");
+ return WidenedOp;
+ }
+ void SetWidenedVector(SDValue Op, SDValue Result);
+
+ // Widen Vector Result Promotion.
+ void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+ SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
+ SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVecRes_SELECT_CC(SDNode* N);
+ SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_UNDEF(SDNode *N);
+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VSETCC(SDNode* N);
+
+ SDValue WidenVecRes_Ternary(SDNode *N);
+ SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_POWI(SDNode *N);
+ SDValue WidenVecRes_Shift(SDNode *N);
+ SDValue WidenVecRes_Unary(SDNode *N);
+ SDValue WidenVecRes_InregOp(SDNode *N);
+
+ // Widen Vector Operand.
+ bool WidenVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_SETCC(SDNode* N);
+
+ SDValue WidenVecOp_Convert(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Helper GenWidenVectorLoads - Helper function to generate a set of
+ /// loads to load a vector with a resulting wider type. It takes
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD);
+
+ /// GenWidenVectorExtLoads - Helper function to generate a set of extension
+ /// loads to load a ector with a resulting wider type. It takes
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ /// ExtType: extension element type
+ SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+ /// Helper genWidenVectorStores - Helper function to generate a set of
+ /// stores to store a widen vector into non widen memory
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+
+ /// Helper genWidenVectorTruncStores - Helper function to generate a set of
+ /// stores to store a truncate widen vector into non widen memory
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST);
+
+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The
+ /// input vector must have the same element type as NVT.
+ SDValue ModifyToType(SDValue InOp, EVT WidenVT);
+
+
+ //===--------------------------------------------------------------------===//
+ // Generic Splitting: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // not necessarily identical types. As such they can be used for splitting
+ // vectors and expanding integers and floats.
+
+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isVector())
+ GetSplitVector(Op, Lo, Hi);
+ else if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+ /// which is split (or expanded) into two not necessarily identical pieces.
+ void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
+
+ /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+ /// high parts of the given value.
+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+ // Generic Result Splitting.
+ void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Expansion: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // identical types of half the size, and that the Lo/Hi part is stored first
+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As
+ // such they can be used for expanding integers and floats.
+
+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ // Generic Result Expansion.
+ void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Generic Operand Expansion.
+ SDValue ExpandOp_BITCAST (SDNode *N);
+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 0000000..222d1c0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,525 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part. This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetExpandedOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ // Convert the integer operand instead.
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // Convert the expanded pieces of the input.
+ GetExpandedOp(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeSplitVector:
+ GetSplitVector(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element instead.
+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeWidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
+ InOp = GetWidenedVector(InOp);
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ InVT.getVectorNumElements()/2);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ }
+
+ if (InVT.isVector() && OutVT.isInteger()) {
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
+ // is legal but the result is not.
+ unsigned NumElems = 2;
+ EVT ElemVT = NOutVT;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+
+ // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>.
+ while (!isTypeLegal(NVT)) {
+ unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2;
+ // If the element size is smaller than byte, bail.
+ if (NewSizeInBits < 8)
+ break;
+ NumElems *= 2;
+ ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits);
+ NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+ }
+
+ if (isTypeLegal(NVT)) {
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
+
+ SmallVector<SDValue, 8> Vals;
+ for (unsigned i = 0; i < NumElems; ++i)
+ Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
+ CastInOp, DAG.getIntPtrConstant(i)));
+
+ // Build Lo, Hi pair by pairing extracted elements if needed.
+ unsigned Slot = 0;
+ for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) {
+ // Each iteration will BUILD_PAIR two nodes and append the result until
+ // there are only two nodes left, i.e. Lo and Hi.
+ SDValue LHS = Vals[Slot];
+ SDValue RHS = Vals[Slot + 1];
+
+ if (TLI.isBigEndian())
+ std::swap(LHS, RHS);
+
+ Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
+ EVT::getIntegerVT(
+ *DAG.getContext(),
+ LHS.getValueType().getSizeInBits() << 1),
+ LHS, RHS));
+ }
+ Lo = Vals[Slot++];
+ Hi = Vals[Slot++];
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return;
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack.
+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and expanded destination types.
+ unsigned Alignment =
+ TLI.getDataLayout()->getPrefTypeAlignment(NOutVT.
+ getTypeForEVT(*DAG.getContext()));
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Load the first half from the stack slot.
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo,
+ false, false, false, 0);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the second half from the stack slot.
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), false,
+ false, false, MinAlign(Alignment, IncrementSize));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Return the operands.
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+ Hi : Lo;
+
+ assert(Part.getValueType() == N->getValueType(0) &&
+ "Type twice as big as expanded type not itself expanded!");
+
+ GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue OldVec = N->getOperand(0);
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ EVT OldEltVT = OldVec.getValueType().getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Convert to a vector of the expanded element type, for example
+ // <3 x i64> -> <6 x i32>.
+ EVT OldVT = N->getValueType(0);
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+ if (OldVT != OldEltVT) {
+ // The result of EXTRACT_VECTOR_ELT may be larger than the element type of
+ // the input vector. If so, extend the elements of the input vector to the
+ // same bitwidth as the result before expanding.
+ assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts);
+ OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ NewVT, 2*OldElts),
+ OldVec);
+
+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+ SDValue Idx = N->getOperand(1);
+
+ // Make sure the type of Idx is big enough to hold the new values.
+ if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(1, Idx.getValueType()));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+ DebugLoc dl = N->getDebugLoc();
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ isVolatile, isNonTemporal, isInvariant, Alignment);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ const unsigned Align = N->getConstantOperandVal(3);
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0).isVector()) {
+ // An illegal expanding type is being converted to a legal vector type.
+ // Make a two element vector out of the expanded parts and convert that
+ // instead, but only if the new vector type is legal (otherwise there
+ // is no point, and it might create expansion loops). For example, on
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+ 2);
+
+ if (isTypeLegal(NVT)) {
+ SDValue Parts[2];
+ GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+
+ if (TLI.isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
+ }
+ }
+
+ // Otherwise, store to a temporary and load out again as the new type.
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(OldVT == VecVT.getVectorElementType() &&
+ "BUILD_VECTOR operand type doesn't match vector element type!");
+
+ // Build a vector of twice the length out of the expanded elements.
+ // For example <3 x i64> -> <6 x i32>.
+ std::vector<SDValue> NewElts;
+ NewElts.reserve(NumElts*2);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(i), Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ NewElts.push_back(Lo);
+ NewElts.push_back(Hi);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ NewVT, NewElts.size()),
+ &NewElts[0], NewElts.size());
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = N->getOperand(1);
+ EVT OldEVT = Val.getValueType();
+ EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+ assert(OldEVT == VecVT.getVectorElementType() &&
+ "Inserted element type doesn't match vector element type!");
+
+ // Bitconvert to a vector of twice the length with elements of the expanded
+ // type, insert the expanded vector elements, and then convert back.
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ NewVecVT, N->getOperand(0));
+
+ SDValue Lo, Hi;
+ GetExpandedOp(Val, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue Idx = N->getOperand(2);
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl,
+ Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ Ops[0] = N->getOperand(0);
+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+ for (unsigned i = 1; i < NumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ DebugLoc dl = N->getDebugLoc();
+
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+ St->getValue().getValueType());
+ SDValue Chain = St->getChain();
+ SDValue Ptr = St->getBasePtr();
+ unsigned Alignment = St->getAlignment();
+ bool isVolatile = St->isVolatile();
+ bool isNonTemporal = St->isNonTemporal();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(St->getValue(), Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ St->getPointerInfo().getWithOffset(IncrementSize),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetSplitOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH, CL, CH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ CL = CH = Cond;
+ if (Cond.getValueType().isVector()) {
+ assert(Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ "Condition legalized before result?");
+ unsigned NumElements = Cond.getValueType().getVectorNumElements();
+ EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
+ CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getIntPtrConstant(0));
+ CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getIntPtrConstant(NumElements / 2));
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(2), LL, LH);
+ GetSplitOp(N->getOperand(3), RL, RH);
+
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+ N->getOperand(1), LL, RL, N->getOperand(4));
+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+ N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getUNDEF(LoVT);
+ Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 0000000..c6e066e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,770 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types. For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+ SelectionDAG& DAG;
+ const TargetLowering &TLI;
+ bool Changed; // Keep track of whether anything changed
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
+
+ // Adds a node to the translation cache
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+ // Legalizes the given node
+ SDValue LegalizeOp(SDValue Op);
+ // Assuming the node is legal, "legalize" the results
+ SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+ // Implements unrolling a VSETCC.
+ SDValue UnrollVSETCC(SDValue Op);
+ // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
+ // isn't legal.
+ // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ // SINT_TO_FLOAT and SHR on vectors isn't legal.
+ SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+ // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+ SDValue ExpandSEXTINREG(SDValue Op);
+ // Implement vselect in terms of XOR, AND, OR when blend is not supported
+ // by the target.
+ SDValue ExpandVSELECT(SDValue Op);
+ SDValue ExpandSELECT(SDValue Op);
+ SDValue ExpandLoad(SDValue Op);
+ SDValue ExpandStore(SDValue Op);
+ SDValue ExpandFNEG(SDValue Op);
+ // Implements vector promotion; this is essentially just bitcasting the
+ // operands to a different type and bitcasting the result back to the
+ // original type.
+ SDValue PromoteVectorOp(SDValue Op);
+ // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
+ // operand to the next size up.
+ SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
+
+ public:
+ bool Run();
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+ // Before we start legalizing vector nodes, check if there are any vectors.
+ bool HasVectors = false;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) {
+ // Check if the values of the nodes contain vectors. We don't need to check
+ // the operands because we are going to check their values at some point.
+ for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
+ J != E; ++J)
+ HasVectors |= J->isVector();
+
+ // If we found a vector node we can start the legalization.
+ if (HasVectors)
+ break;
+ }
+
+ // If this basic block has no vectors then no need to legalize vectors.
+ if (!HasVectors)
+ return false;
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+
+ return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+ // Generic legalization: just pass the operand through.
+ for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+ return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDNode* Node = Op.getNode();
+
+ // Legalize the operands
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+ SDValue Result =
+ SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
+
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+ if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
+ return TranslateLegalizeResults(Op, Result);
+ Changed = true;
+ return LegalizeOp(ExpandLoad(Op));
+ }
+ } else if (Op.getOpcode() == ISD::STORE) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT StVT = ST->getMemoryVT();
+ MVT ValVT = ST->getValue().getSimpleValueType();
+ if (StVT.isVector() && ST->isTruncatingStore())
+ switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ return TranslateLegalizeResults(Op, Result);
+ case TargetLowering::Custom:
+ Changed = true;
+ return LegalizeOp(TLI.LowerOperation(Result, DAG));
+ case TargetLowering::Expand:
+ Changed = true;
+ return LegalizeOp(ExpandStore(Op));
+ }
+ }
+
+ bool HasVectorValue = false;
+ for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+ J != E;
+ ++J)
+ HasVectorValue |= J->isVector();
+ if (!HasVectorValue)
+ return TranslateLegalizeResults(Op, Result);
+
+ EVT QueryType;
+ switch (Op.getOpcode()) {
+ default:
+ return TranslateLegalizeResults(Op, Result);
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ case ISD::FMA:
+ case ISD::SIGN_EXTEND_INREG:
+ QueryType = Node->getValueType(0);
+ break;
+ case ISD::FP_ROUND_INREG:
+ QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ QueryType = Node->getOperand(0).getValueType();
+ break;
+ }
+
+ switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ case TargetLowering::Promote:
+ switch (Op.getOpcode()) {
+ default:
+ // "Promote" the operation by bitcasting
+ Result = PromoteVectorOp(Op);
+ Changed = true;
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // "Promote" the operation by extending the operand.
+ Result = PromoteVectorOpINT_TO_FP(Op);
+ Changed = true;
+ break;
+ }
+ break;
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom: {
+ SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
+ if (Tmp1.getNode()) {
+ Result = Tmp1;
+ break;
+ }
+ // FALL THROUGH
+ }
+ case TargetLowering::Expand:
+ if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
+ Result = ExpandSEXTINREG(Op);
+ else if (Node->getOpcode() == ISD::VSELECT)
+ Result = ExpandVSELECT(Op);
+ else if (Node->getOpcode() == ISD::SELECT)
+ Result = ExpandSELECT(Op);
+ else if (Node->getOpcode() == ISD::UINT_TO_FP)
+ Result = ExpandUINT_TO_FLOAT(Op);
+ else if (Node->getOpcode() == ISD::FNEG)
+ Result = ExpandFNEG(Op);
+ else if (Node->getOpcode() == ISD::SETCC)
+ Result = UnrollVSETCC(Op);
+ else
+ Result = DAG.UnrollVectorOp(Op.getNode());
+ break;
+ }
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op) {
+ Result = LegalizeOp(Result);
+ Changed = true;
+ }
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
+ // Vector "promotion" is basically just bitcasting and doing the operation
+ // in a different type. For example, x86 promotes ISD::AND on v2i32 to
+ // v1i64.
+ MVT VT = Op.getSimpleValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
+ // INT_TO_FP operations may require the input operand be promoted even
+ // when the type is otherwise legal.
+ EVT VT = Op.getOperand(0).getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+
+ // Normal getTypeToPromoteTo() doesn't work here, as that will promote
+ // by widening the vector w/ the same element width and twice the number
+ // of elements. We want the other way around, the same number of elements,
+ // each twice the width.
+ //
+ // Increase the bitwidth of the element to the next pow-of-two
+ // (which is greater than 8 bits).
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
+ assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
+
+ // Build a new vector type and check if it is legal.
+ MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0],
+ Operands.size());
+}
+
+
+SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ EVT SrcVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+
+ EVT SrcEltVT = SrcVT.getScalarType();
+ EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
+
+ if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
+ // When elements in a vector is not byte-addressable, we cannot directly
+ // load each element by advancing pointer, which could only address bytes.
+ // Instead, we load all significant words, mask bits off, and concatenate
+ // them to form each element. Finally, they are extended to destination
+ // scalar type to build the destination vector.
+ EVT WideVT = TLI.getPointerTy();
+
+ assert(WideVT.isRound() &&
+ "Could not handle the sophisticated case when the widest integer is"
+ " not power of 2.");
+ assert(WideVT.bitsGE(SrcEltVT) &&
+ "Type is not legalized?");
+
+ unsigned WideBytes = WideVT.getStoreSize();
+ unsigned Offset = 0;
+ unsigned RemainingBytes = SrcVT.getStoreSize();
+ SmallVector<SDValue, 8> LoadVals;
+
+ while (RemainingBytes > 0) {
+ SDValue ScalarLoad;
+ unsigned LoadBytes = WideBytes;
+
+ if (RemainingBytes >= LoadBytes) {
+ ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ } else {
+ EVT LoadVT = WideVT;
+ while (RemainingBytes < LoadBytes) {
+ LoadBytes >>= 1; // Reduce the load size by half.
+ LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
+ }
+ ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LoadVT, LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ }
+
+ RemainingBytes -= LoadBytes;
+ Offset += LoadBytes;
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(LoadBytes));
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ // Extract bits, pack and extend/trunc them into destination type.
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
+
+ unsigned BitOffset = 0;
+ unsigned WideIdx = 0;
+ unsigned WideBits = WideVT.getSizeInBits();
+
+ for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
+ SDValue Lo, Hi, ShAmt;
+
+ if (BitOffset < WideBits) {
+ ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
+ Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
+ }
+
+ BitOffset += SrcEltBits;
+ if (BitOffset >= WideBits) {
+ WideIdx++;
+ Offset -= WideBits;
+ if (Offset > 0) {
+ ShAmt = DAG.getConstant(SrcEltBits - Offset,
+ TLI.getShiftAmountTy(WideVT));
+ Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
+ }
+ }
+
+ if (Hi.getNode())
+ Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
+
+ switch (ExtType) {
+ default: llvm_unreachable("Unknown extended-load op!");
+ case ISD::EXTLOAD:
+ Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::ZEXTLOAD:
+ Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::SEXTLOAD:
+ ShAmt = DAG.getConstant(WideBits - SrcEltBits,
+ TLI.getShiftAmountTy(WideVT));
+ Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ }
+ Vals.push_back(Lo);
+ }
+ } else {
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
+ Op.getNode()->getValueType(0).getScalarType(),
+ Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ Vals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getNode()->getValueType(0), &Vals[0], Vals.size());
+
+ AddLegalizedOperand(Op.getValue(0), Value);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return (Op.getResNo() ? NewChain : Value);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ SDValue Chain = ST->getChain();
+ SDValue BasePTR = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ unsigned NumElem = StVT.getVectorNumElements();
+ // The type of the data we want to save
+ EVT RegVT = Value.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ // Cast floats into integers
+ unsigned ScalarSize = MemSclVT.getSizeInBits();
+
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize))
+ ScalarSize = NextPowerOf2(ScalarSize);
+
+ // Store Stride in bytes
+ unsigned Stride = ScalarSize/8;
+ // Extract each of the elements from the original vector
+ // and save them into memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ RegSclVT, Value, DAG.getIntPtrConstant(Idx));
+
+ // This scalar TruncStore may be illegal, but we legalize it later.
+ SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
+ ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ Stores.push_back(Store);
+ }
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ AddLegalizedOperand(Op, TF);
+ return TF;
+}
+
+SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+ // Lower a select instruction where the condition is a scalar and the
+ // operands are vectors. Lower this select to VSELECT and implement it
+ // using XOR AND OR. The selector bit is broadcasted.
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ assert(VT.isVector() && !Mask.getValueType().isVector()
+ && Op1.getValueType() == Op2.getValueType() && "Invalid type");
+
+ unsigned NumElem = VT.getVectorNumElements();
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Generate a mask operand.
+ EVT MaskTy = TLI.getSetCCResultType(VT);
+ assert(MaskTy.isVector() && "Invalid CC type");
+ assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits()
+ && "Invalid mask size");
+
+ // What is the size of each element in the vector mask.
+ EVT BitTy = MaskTy.getScalarType();
+
+ Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask,
+ DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
+ DAG.getConstant(0, BitTy));
+
+ // Broadcast the mask so that the entire vector is all-one or all zero.
+ SmallVector<SDValue, 8> Ops(NumElem, Mask);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size());
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
+SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Make sure that the SRA and SHL instructions are available.
+ if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+
+ unsigned BW = VT.getScalarType().getSizeInBits();
+ unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
+ SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
+
+ Op = Op.getOperand(0);
+ Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
+ return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
+}
+
+SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
+ // Implement VSELECT in terms of XOR, AND, OR
+ // on platforms which do not support blend natively.
+ EVT VT = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // This operation also isn't safe with AND, OR, XOR when the boolean
+ // type is 0/1 as we need an all ones vector constant to mask with.
+ // FIXME: Sign extend 1 to all ones if thats legal on the target.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getBooleanContents(true) !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits()
+ && "Invalid mask size");
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+ EVT VT = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Make sure that the SINT_TO_FP and SRL instructions are available.
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ EVT SVT = VT.getScalarType();
+ assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
+ "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+ unsigned BW = SVT.getSizeInBits();
+ SDValue HalfWord = DAG.getConstant(BW/2, VT);
+
+ // Constants to clear the upper part of the word.
+ // Notice that we can also use SHL+SHR, but using a constant is slightly
+ // faster on x86.
+ uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+ SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+
+ // Two to the power of half-word-size.
+ SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+
+ // Clear upper part of LO, lower HI
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+ // Add the two halves
+ return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+ SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Zero, Op.getOperand(0));
+ }
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 8> Ops(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getIntPtrConstant(i));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
+ LHSElem, RHSElem, CC);
+ Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), EltVT),
+ DAG.getConstant(0, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+ return VectorLegalizer(*this).Run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 0000000..5ec8535
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,2795 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size. For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to scalarize the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
+ case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::ANY_EXTEND:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ R = ScalarizeVecRes_UnaryOp(N);
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ R = ScalarizeVecRes_BinOp(N);
+ break;
+ case ISD::FMA:
+ R = ScalarizeVecRes_TernaryOp(N);
+ break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ Op0.getValueType(), Op0, Op1, Op2);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetScalarizedVector(Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ // The BUILD_VECTOR operands may be of wider element types and
+ // we may need to truncate them back to the requested return type.
+ if (EltVT.isInteger())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
+ Op0, DAG.getValueType(NewVT),
+ DAG.getValueType(Op0.getValueType()),
+ N->getOperand(3),
+ N->getOperand(4),
+ cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ NewVT, Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(ISD::UNINDEXED,
+ N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(),
+ N->getDebugLoc(),
+ N->getChain(), N->getBasePtr(),
+ DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->isInvariant(), N->getOriginalAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ EVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT,
+ LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+ SDValue Cond = GetScalarizedVector(N->getOperand(0));
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+ if (ScalarBool != VecBool) {
+ EVT CondVT = Cond.getValueType();
+ switch (ScalarBool) {
+ case TargetLowering::UndefinedBooleanContent:
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+ // Vector read from all ones, scalar expects a single 1 so mask.
+ Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT,
+ Cond, DAG.getConstant(1, CondVT));
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrOneBooleanContent);
+ // Vector reads from a one, scalar from all ones so sign extend.
+ Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT,
+ Cond, DAG.getValueType(MVT::i1));
+ break;
+ }
+ }
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+
+ if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
+
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ DebugLoc DL = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ EVT NVT = N->getValueType(0).getVectorElementType();
+ DebugLoc DL = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
+ return DAG.getNode(ExtendCode, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to scalarize this operator's operand!");
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ Res = ScalarizeVecOp_EXTEND(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Extend the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SmallVector<SDValue, 1> Ops(1);
+ Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ N->getValueType(0).getScalarType(), Elt);
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], 1);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], Ops.size());
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0),
+ Res);
+ return Res;
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(N->getChain(), dl,
+ GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->getAlignment());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->getOriginalAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Split node result: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::LOAD:
+ SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::SETCC:
+ SplitVecRes_SETCC(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::CONVERT_RNDSAT:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+ break;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::FPOW:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM:
+ SplitVecRes_BinOp(N, Lo, Hi);
+ break;
+ case ISD::FMA:
+ SplitVecRes_TernaryOp(N, Lo, Hi);
+ break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op0Lo, Op0Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ SDValue Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ SDValue Op2Lo, Op2Hi;
+ GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
+ Op0Lo, Op1Lo, Op2Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
+ Op0Hi, Op1Hi, Op2Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeWidenVector:
+ break;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case TargetLowering::TypeSplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+ EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+ if (TLI.isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT LoVT, HiVT;
+ GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+ DAG.getValueType(LoVT));
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+ DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ if (IdxVal < LoNumElts)
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ else
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getIntPtrConstant(IdxVal - LoNumElts));
+ return;
+ }
+
+ // Spill the vector to the stack.
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment =
+ TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
+ false, false, 0);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ EVT LoVT, HiVT;
+ DebugLoc dl = LD->getDebugLoc();
+ GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ EVT MemoryVT = LD->getMemoryVT();
+ unsigned Alignment = LD->getOriginalAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
+
+ EVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+ isInvariant, Alignment);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
+ EVT LoVT, HiVT;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // Split the input.
+ EVT InVT = N->getOperand(0).getValueType();
+ SDValue LL, LH, RL, RH;
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+ RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+ DAG.getIntPtrConstant(0));
+ RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ EVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ EVT InVT = N->getOperand(0).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ } else {
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ }
+
+ if (N->getOpcode() == ISD::FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+ SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ EVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool useBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ useBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (useBuildVector) {
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Inputs[Input], DAG.getIntPtrConstant(Idx)));
+ }
+
+ // Construct the Lo/Hi output using a BUILD_VECTOR.
+ Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = DAG.getUNDEF(NewVT);
+ } else {
+ SDValue Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = InputUsed[1] == -1U ?
+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+ }
+
+ Ops.clear();
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Split node operand: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::VSELECT:
+ Res = SplitVecOp_VSELECT(N, OpNo);
+ break;
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FTRUNC:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
+ // The only possibility for an illegal operand is the mask, since result type
+ // legalization would have handled this node already otherwise.
+ assert(OpNo == 0 && "Illegal operand must be mask");
+
+ SDValue Mask = N->getOperand(0);
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ DebugLoc DL = N->getDebugLoc();
+ EVT MaskVT = Mask.getValueType();
+ assert(MaskVT.isVector() && "VSELECT without a vector mask?");
+
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ assert(Lo.getValueType() == Hi.getValueType() &&
+ "Lo and Hi have differing types");;
+
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ unsigned HiNumElts = Hi.getValueType().getVectorNumElements();
+ assert(LoNumElts == HiNumElts && "Asymmetric vector split?");
+
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue LoElts = DAG.getIntPtrConstant(LoNumElts);
+ EVT Src0VT = Src0.getValueType();
+ EVT Src0EltTy = Src0VT.getVectorElementType();
+ EVT MaskEltTy = MaskVT.getVectorElementType();
+
+ EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts);
+ EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts);
+ EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts);
+ EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts);
+
+ SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero);
+ SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero);
+
+ SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts);
+ SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts);
+
+ SDValue LoMask =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero);
+ SDValue HiMask =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts);
+
+ SDValue LoSelect =
+ DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
+ SDValue HiSelect =
+ DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal.
+ EVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoElts) {
+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+ }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec.getValueType();
+
+ if (isa<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+ if (IdxVal < LoElts)
+ return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
+ DAG.getConstant(IdxVal - LoElts,
+ Idx.getValueType())), 0);
+ }
+
+ // Store the vector to the stack.
+ EVT EltVT = VecVT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Load back the required element.
+ StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo(), EltVT, false, false, 0);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ DebugLoc DL = N->getDebugLoc();
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ bool isVol = N->isVolatile();
+ bool isNT = N->isNonTemporal();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ EVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+ LoMemVT, isVol, isNT, Alignment);
+ else
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+ isVol, isNT, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVol, isNT, Alignment);
+ else
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ isVol, isNT, Alignment);
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // The input operands all must have the same type, and we know the result the
+ // result type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ SDValue Op = N->getOperand(op);
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ Op, DAG.getIntPtrConstant(i)));
+
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
+ &Elts[0], Elts.size());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ // The result has a legal vector type, but the input needs splitting.
+ SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo0, Hi0);
+ GetSplitVector(N->getOperand(1), Lo1, Hi1);
+ unsigned PartElements = Lo0.getValueType().getVectorNumElements();
+ EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
+ EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
+ return PromoteTargetBoolean(Con, N->getValueType(0));
+}
+
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+
+ // See if the target wants to custom widen this node.
+ if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+ return;
+
+ SDValue Res = SDValue();
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen the result of this operator!");
+
+ case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+ break;
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::BSWAP:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::SUB:
+ case ISD::XOR:
+ Res = WidenVecRes_Binary(N);
+ break;
+
+ case ISD::FPOWI:
+ Res = WidenVecRes_POWI(N);
+ break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ Res = WidenVecRes_Shift(N);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecRes_Convert(N);
+ break;
+
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ Res = WidenVecRes_Unary(N);
+ break;
+ case ISD::FMA:
+ Res = WidenVecRes_Ternary(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
+ // Ternary op widening.
+ DebugLoc dl = N->getDebugLoc();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = GetWidenedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ unsigned Opcode = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+ // Operation doesn't trap so just widen as normal.
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, &SubConcatOps[0],
+ OpsToConcat);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ unsigned Opcode = N->getOpcode();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ }
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+ &Ops[0], NumConcat);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0));
+ // Extract the input and convert the shorten input vector.
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+
+ EVT ShVT = ShOp.getValueType();
+ if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
+ ShOp = GetWidenedVector(ShOp);
+ ShVT = ShOp.getValueType();
+ }
+ EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ ShVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ if (ShVT != ShWidenVT)
+ ShOp = ModifyToType(ShOp, ShWidenVT);
+
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ cast<VTSDNode>(N->getOperand(1))->getVT()
+ .getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
+ SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetWidenedVector(WidenVec);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ InOp = GetPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeSplitVector:
+ break;
+ case TargetLowering::TypeWidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ EVT NewInVT;
+ unsigned NewNumElts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
+ } else {
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i < NewNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ SDValue NewVec;
+ if (InVT.isVector())
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ else
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // Build a vector with undefined for the new nodes.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ NewOps.reserve(WidenNumElts);
+ for (unsigned i = NumElts; i < WidenNumElts; ++i)
+ NewOps.push_back(DAG.getUNDEF(EltVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ EVT InVT = N->getOperand(0).getValueType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenVT.getVectorNumElements() /
+ InVT.getVectorNumElements();
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ break;
+
+ if (i == NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts, -1);
+ for (unsigned i = 0; i < NumInElts; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i + NumInElts] = i + WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ &MaskOps[0]);
+ }
+ }
+ }
+
+ // Fall back to use extracts and build vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue InOp = N->getOperand(0);
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ SDValue DTyOp = DAG.getValueType(WidenVT);
+ SDValue STyOp = DAG.getValueType(InWidenVT);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+ DAG.getIntPtrConstant(0));
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ DTyOp = DAG.getValueType(EltVT);
+ STyOp = DAG.getValueType(InEltVT);
+
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ EVT InVT = InOp.getValueType();
+
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i;
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(IdxVal+i));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
+ &LdChain[0], LdChain.size());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
+ WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue Cond1 = N->getOperand(0);
+ EVT CondVT = Cond1.getValueType();
+ if (CondVT.isVector()) {
+ EVT CondEltVT = CondVT.getVectorElementType();
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ CondEltVT, WidenNumElts);
+ if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+ if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT,
+ InOp1, InOp2, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = N->getOperand(0);
+ EVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non vector type");
+ EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), WidenNumElts);
+ InOp1 = GetWidenedVector(InOp1);
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ (void)WidenInVT;
+ return DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom widen this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen this operator's operand!");
+
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = WidenVecOp_Convert(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal, it is unlikely
+ // that we can fix the input to a legal type so unroll the convert
+ // into some scalar code and create a nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ for (unsigned i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i)));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InWidenVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check if we can convert between two legal vector types and extract.
+ unsigned InWidenSize = InWidenVT.getSizeInBits();
+ unsigned Size = VT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
+ unsigned NewNumElts = InWidenSize / Size;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ // If the input vector is not legal, it is likely that we will not find a
+ // legal vector of the same size. Replace the concatenate vector with a
+ // nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ EVT InVT = N->getOperand(0).getValueType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ unsigned NumOperands = N->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SmallVector<SDValue, 16> StChain;
+ if (ST->isTruncatingStore())
+ GenWidenVectorTruncStores(StChain, ST);
+ else
+ GenWidenVectorStores(StChain, ST);
+
+ if (StChain.size() == 1)
+ return StChain[0];
+ else
+ return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
+ MVT::Other,&StChain[0],StChain.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+ SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ // WARNING: In this code we widen the compare instruction with garbage.
+ // This garbage may contain denormal floats which may be slow. Is this a real
+ // concern ? Should we zero the unused lanes if this is a float compare ?
+
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = TLI.getSetCCResultType(InOp0.getValueType());
+ SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ ResVT, WideSETCC, DAG.getIntPtrConstant(0));
+
+ return PromoteTargetBoolean(CC, N->getValueType(0));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+// TLI: Target lowering used to determine legal types.
+// Width: Width left need to load/store.
+// WidenVT: The widen vector type to load to/store from
+// Align: If 0, don't allow use of a wider type
+// WidenEx: If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+ unsigned AlignInBits = Align*8;
+
+ // If we have one element to load/store, return it.
+ EVT RetVT = WidenEltVT;
+ if (Width == WidenEltWidth)
+ return RetVT;
+
+ // See if there is larger legal integer than the element type to load/store
+ unsigned VT;
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ RetVT = MemVT;
+ break;
+ }
+ }
+
+ // See if there is a larger vector type to load/store that has the same vector
+ // element type and is evenly divisible with the WidenVT.
+ for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+ EVT MemVT = (MVT::SimpleValueType) VT;
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ return MemVT;
+ }
+ }
+
+ return RetVT;
+}
+
+// Builds a vector type from scalar loads
+// VecTy: Resulting Vector type
+// LDOps: Load operators to build a vector type
+// [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+ SmallVector<SDValue, 16>& LdOps,
+ unsigned Start, unsigned End) {
+ DebugLoc dl = LdOps[Start].getDebugLoc();
+ EVT LdTy = LdOps[Start].getValueType();
+ unsigned Width = VecTy.getSizeInBits();
+ unsigned NumElts = Width / LdTy.getSizeInBits();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+ unsigned Idx = 1;
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+ for (unsigned i = Start + 1; i != End; ++i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ NumElts = Width / NewLdTy.getSizeInBits();
+ NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
+ // Readjust position and vector position based on new load type
+ Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+ LdTy = NewLdTy;
+ }
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getIntPtrConstant(Idx++));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+ LoadSDNode *LD) {
+ // The strategy assumes that we can efficiently load powers of two widths.
+ // The routines chops the vector into the largest vector loads with the same
+ // element type or scalar loads and then recombines it to the widen vector
+ // type.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
+
+ int LdWidth = LdVT.getSizeInBits();
+ int WidthDiff = WidenWidth - LdWidth; // Difference
+ unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
+
+ // Find the vector type that can load from.
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ int NewVTWidth = NewVT.getSizeInBits();
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ isVolatile, isNonTemporal, isInvariant, Align);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction
+ if (LdWidth <= NewVTWidth) {
+ if (!NewVT.isVector()) {
+ unsigned NumElts = WidenWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+ }
+ if (NewVT == WidenVT)
+ return LdOp;
+
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+ NumConcat);
+ }
+
+ // Load vector by using multiple loads from largest vector to scalar
+ SmallVector<SDValue, 16> LdOps;
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
+ unsigned Offset = 0;
+
+ while (LdWidth > 0) {
+ unsigned Increment = NewVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+
+ SDValue L;
+ if (LdWidth < NewVTWidth) {
+ // Our current type we are using is too large, find a better size
+ NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVTWidth = NewVT.getSizeInBits();
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ if (L->getValueType(0).isVector()) {
+ SmallVector<SDValue, 16> Loads;
+ Loads.push_back(L);
+ unsigned size = L->getValueSizeInBits(0);
+ while (size < LdOp->getValueSizeInBits(0)) {
+ Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+ size += L->getValueSizeInBits(0);
+ }
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+ &Loads[0], Loads.size());
+ }
+ } else {
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ }
+
+ LdOps.push_back(L);
+
+
+ LdWidth -= NewVTWidth;
+ }
+
+ // Build the vector from the loads operations
+ unsigned End = LdOps.size();
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to loads are power of 2 and the scalars load
+ // can be combined to make a power of 2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First combine the scalar loads to a vector
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ &ConcatOps[Idx], End - Idx);
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
+
+ if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ &ConcatOps[Idx], End - Idx);
+
+ // We need to fill the rest with undefs to build the vector
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode * LD,
+ ISD::LoadExtType ExtType) {
+ // For extension loads, it may not be more efficient to chop up the vector
+ // and then extended it. Instead, we unroll the load and build a new vector.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ EVT EltVT = WidenVT.getVectorElementType();
+ EVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+ LD->getPointerInfo(),
+ LdEltVT, isVolatile, isNonTemporal, Align);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ isVolatile, isNonTemporal, Align);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+}
+
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // The strategy assumes that we can efficiently store powers of two widths.
+ // The routines chops the vector into the largest vector stores with the same
+ // element type or scalar stores.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ EVT ValVT = ValOp.getValueType();
+ unsigned ValWidth = ValVT.getSizeInBits();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ assert(StVT.getVectorElementType() == ValEltVT);
+
+ int Idx = 0; // current index to store
+ unsigned Offset = 0; // offset from base to store
+ while (StWidth != 0) {
+ // Find the largest vector type we can store with
+ EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+ unsigned NewVTWidth = NewVT.getSizeInBits();
+ unsigned Increment = NewVTWidth / 8;
+ if (NewVT.isVector()) {
+ unsigned NumVTElts = NewVT.getVectorNumElements();
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getIntPtrConstant(Idx));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ Idx += NumVTElts;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } else {
+ // Cast the vector to the scalar type we can store
+ unsigned NumElts = ValWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
+ // Readjust index position based on new vector type
+ Idx = Idx * ValEltWidth / NewVTWidth;
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getIntPtrConstant(Idx++));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ // Restore index back to be relative to the original widen element type
+ Idx = Idx * NewVTWidth / ValEltWidth;
+ }
+ }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // For extension loads, it may not be more efficient to truncate the vector
+ // and then store it. Instead, we extract each element and then store it.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ValOp.getValueType();
+
+ // It must be true that we the widen vector type is bigger than where
+ // we need to store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ // For truncating stores, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the store.
+ EVT StEltVT = StVT.getVectorElementType();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT,
+ isVolatile, isNonTemporal, Align));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
+ }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ EVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ DebugLoc dl = InOp.getDebugLoc();
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+ unsigned NumConcat = WidenNumElts / InNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+ }
+
+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getIntPtrConstant(0));
+
+ // Fall back to extract and build.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(Idx));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for ( ; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 0000000..473e138
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,655 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<signed> RegPressureThreshold(
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
+
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
+ Picker(this),
+ InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
+{
+ TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
+ TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
+ TLI = &IS->getTargetLowering();
+
+ const TargetMachine &tm = (*IS->MF).getTarget();
+ ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+ // This hard requirement could be relaxed, but for now
+ // do not let it procede.
+ assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *PredSU = I->getSUnit();
+ const SDNode *ScegN = PredSU->getNode();
+
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: NumberDeps++; break;
+ case ISD::CopyToReg: break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+ unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *SuccSU = I->getSUnit();
+ const SDNode *ScegN = SuccSU->getNode();
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: break;
+ case ISD::CopyToReg: NumberDeps++; break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ initNumRegDefsLeft(SU);
+ SU->NodeQueueId = 0;
+ }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+ Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getNode())
+ return false;
+
+ // If this is a compound instruction,
+ // it is likely to be a call. Do not delay it.
+ if (SU->getNode()->getGluedNode())
+ return true;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ if (SU->getNode()->isMachineOpcode())
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode())))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+
+ // Now see if there are no other dependencies
+ // to instructions alredy in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignor order deps.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+
+ return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ ResourcesModel->reserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode()));
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+ Packet.push_back(SU);
+ }
+ // Forcefully end packet for PseudoOps.
+ else {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+}
+
+signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ // Gen estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+ MVT VT = SU->getNode()->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance += numberRCValSuccInSU(SU, RCId);
+ }
+ // Kill estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = SU->getNode()->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (isa<ConstantSDNode>(Op.getNode()))
+ continue;
+
+ if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance -= numberRCValPredInSU(SU, RCId);
+ }
+ return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is achieved by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ if (RawPressure) {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+ else {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ if ((RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) > 0) &&
+ (RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+
+ return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 15;
+static const unsigned PriorityFive = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+ // Initial trivial priority.
+ signed ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Adaptable scheduling
+ // A small, but very parallel
+ // region, where reg pressure is an issue.
+ if (HorizontalVerticalBalance > RegPressureThreshold) {
+ // Critical path first
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ // Consider change to reg pressure from scheduling
+ // this SU.
+ ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+ }
+ // Default heuristic, greeady and
+ // critical path driven.
+ else {
+ // Critical path first.
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // Now see how many instructions is blocked by this SU.
+ ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ ResCount -= (regPressureDelta(SU) * ScaleTwo);
+ }
+
+ // These are platform specific things.
+ // Will need to go into the back end
+ // and accessed from here via a hook.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.isCall())
+ ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+ }
+ else
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ ResCount += PriorityFive;
+ break;
+
+ case ISD::INLINEASM:
+ ResCount += PriorityFour;
+ break;
+ }
+ }
+ return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+ // Use NULL entry as an event marker to reset
+ // the DFA state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ return;
+ }
+
+ const SDNode *ScegN = SU->getNode();
+ // Update reg pressure tracking.
+ // First update current node.
+ if (ScegN->isMachineOpcode()) {
+ // Estimate generated regs.
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC)
+ RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+ }
+ }
+ // Estimate killed regs.
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC) {
+ if (RegPressure[RC->getID()] >
+ (numberRCValPredInSU(SU, RC->getID())))
+ RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+ else RegPressure[RC->getID()] = 0;
+ }
+ }
+ }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+ continue;
+ --I->getSUnit()->NumRegDefsLeft;
+ }
+ }
+
+ // Reserve resources for this SU.
+ reserveResources(SU);
+
+ // Adjust number of parallel live ranges.
+ // Heuristic is simple - node with no data successors reduces
+ // number of live ranges. All others, increase it.
+ unsigned NumberNonControlDeps = 0;
+
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ adjustPriorityOfUnscheduledPreds(I->getSUnit());
+ if (!I->isCtrl())
+ NumberNonControlDeps++;
+ }
+
+ if (!NumberNonControlDeps) {
+ if (ParallelLiveRanges >= SU->NumPreds)
+ ParallelLiveRanges -= SU->NumPreds;
+ else
+ ParallelLiveRanges = 0;
+
+ }
+ else
+ ParallelLiveRanges += SU->NumRegDefsLeft;
+
+ // Track parallel live chains.
+ HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+ HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+ unsigned NodeNumDefs = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ // No register need be allocated for this.
+ if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ NodeNumDefs = 0;
+ break;
+ }
+ NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+ }
+ else
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::CopyFromReg:
+ NodeNumDefs++;
+ break;
+ case ISD::INLINEASM:
+ NodeNumDefs++;
+ break;
+ }
+
+ SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+ return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+ if (empty())
+ return 0;
+
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ if (!DisableDFASched) {
+ signed BestCost = SUSchedulingCost(*Best);
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I) {
+
+ if (SUSchedulingCost(*I) > BestCost) {
+ BestCost = SUSchedulingCost(*I);
+ Best = I;
+ }
+ }
+ }
+ // Use default TD scheduling mechanism.
+ else {
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ }
+
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+
+ Queue.pop_back();
+
+ return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+
+ Queue.pop_back();
+}
+
+
+#ifdef NDEBUG
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
+ ResourcePriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 0000000..4af7172
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,114 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H
+#define LLVM_CODEGEN_SDNODEDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+public:
+ enum DbgValueKind {
+ SDNODE = 0, // value is the result of an expression
+ CONST = 1, // value is a constant
+ FRAMEIX = 2 // value is contents of a stack location
+ };
+private:
+ enum DbgValueKind kind;
+ union {
+ struct {
+ SDNode *Node; // valid for expressions
+ unsigned ResNo; // valid for expressions
+ } s;
+ const Value *Const; // valid for constants
+ unsigned FrameIx; // valid for stack objects
+ } u;
+ MDNode *mdPtr;
+ uint64_t Offset;
+ DebugLoc DL;
+ unsigned Order;
+ bool Invalid;
+public:
+ // Constructor for non-constants.
+ SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
+ unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
+ kind = SDNODE;
+ u.s.Node = N;
+ u.s.ResNo = R;
+ }
+
+ // Constructor for constants.
+ SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
+ unsigned O) :
+ mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+ kind = CONST;
+ u.Const = C;
+ }
+
+ // Constructor for frame indices.
+ SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) :
+ mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+ kind = FRAMEIX;
+ u.FrameIx = FI;
+ }
+
+ // Returns the kind.
+ DbgValueKind getKind() { return kind; }
+
+ // Returns the MDNode pointer.
+ MDNode *getMDPtr() { return mdPtr; }
+
+ // Returns the SDNode* for a register ref
+ SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; }
+
+ // Returns the ResNo for a register ref
+ unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
+
+ // Returns the Value* for a constant
+ const Value *getConst() { assert (kind==CONST); return u.Const; }
+
+ // Returns the FrameIx for a stack object
+ unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+
+ // Returns the offset.
+ uint64_t getOffset() { return Offset; }
+
+ // Returns the DebugLoc.
+ DebugLoc getDebugLoc() { return DL; }
+
+ // Returns the SDNodeOrder. This is the order of the preceding node in the
+ // input.
+ unsigned getOrder() { return Order; }
+
+ // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+ // property. A SDDbgValue is invalid if the SDNode that produces the value is
+ // deleted.
+ void setIsInvalidated() { Invalid = true; }
+ bool isInvalidated() { return Invalid; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
new file mode 100644
index 0000000..7e7b897
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -0,0 +1,56 @@
+//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDNodeOrdering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEORDERING_H
+#define LLVM_CODEGEN_SDNODEORDERING_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class SDNode;
+
+/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each
+/// SDNode that roughly corresponds to the ordering of the original LLVM
+/// instruction. This is used for turning off scheduling, because we'll forgo
+/// the normal scheduling algorithms and output the instructions according to
+/// this ordering.
+class SDNodeOrdering {
+ DenseMap<const SDNode*, unsigned> OrderMap;
+
+ void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
+ SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
+public:
+ SDNodeOrdering() {}
+
+ void add(const SDNode *Node, unsigned NewOrder) {
+ unsigned &OldOrder = OrderMap[Node];
+ if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder))
+ OldOrder = NewOrder;
+ }
+ void remove(const SDNode *Node) {
+ DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
+ if (Itr != OrderMap.end())
+ OrderMap.erase(Itr);
+ }
+ void clear() {
+ OrderMap.clear();
+ }
+ unsigned getOrder(const SDNode *Node) {
+ return OrderMap[Node];
+ }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 0000000..d1f36cb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,799 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "InstrEmitter.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical copies");
+
+static RegisterScheduler
+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+ createFastDAGScheduler);
+static RegisterScheduler
+ linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+ createDAGLinearizer);
+
+
+namespace {
+ /// FastPriorityQueue - A degenerate priority queue that considers
+ /// all nodes to have the same priority.
+ ///
+ struct FastPriorityQueue {
+ SmallVector<SUnit *, 16> Queue;
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push_back(U);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.back();
+ Queue.pop_back();
+ return V;
+ }
+ };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ FastPriorityQueue AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+public:
+ ScheduleDAGFast(MachineFunction &mf)
+ : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ void AddPred(SUnit *SU, const SDep &D) {
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void ListScheduleBottomUp();
+
+ /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool forceUnitLatencies() const { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue.push(PredSU);
+ }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = newSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = newSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ }
+
+ SDep ChainPred;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPred = *I;
+ else if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ if (ChainPred.getSUnit()) {
+ RemovePred(SU, ChainPred);
+ if (isNewLoad)
+ AddPred(LoadSU, ChainPred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad) {
+ AddPred(LoadSU, Pred);
+ }
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+ if (isNewLoad) {
+ SDep D(LoadSU, SDep::Barrier);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+ }
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ NewSU = Clone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+ }
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
+
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = MCID.getNumDefs();
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
+ if (RegAdded.insert(*AI)) {
+ LRegs.push_back(*AI);
+ Added = true;
+ }
+ }
+ }
+ return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVector<unsigned, 4> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+ if (!Node->isMachineOpcode())
+ continue;
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue.push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue.pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue.pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try code duplication or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ if (!CurSU) {
+ // Try duplicating the nodes that produces these
+ // "expensive to copy" values to break the dependency. In case even
+ // that doesn't work, insert cross class copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be
+ // possible copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's
+ // possible to copy the value but it require cross register class copies
+ // and it is expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = 0;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical "
+ "register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ if (!CurSU) {
+ llvm_unreachable("Unable to resolve live physical register dependencies!");
+ }
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue.push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order since it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+
+namespace {
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+ ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+private:
+ std::vector<SDNode*> Sequence;
+ DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user
+
+ void ScheduleNode(SDNode *N);
+};
+} // end anonymous namespace
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+ if (N->getNodeId() != 0)
+ llvm_unreachable(0);
+
+ if (!N->isMachineOpcode() &&
+ (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+ // These nodes do not need to be translated into MIs.
+ return;
+
+ DEBUG(dbgs() << "\n*** Scheduling: ");
+ DEBUG(N->dump(DAG));
+ Sequence.push_back(N);
+
+ unsigned NumOps = N->getNumOperands();
+ if (unsigned NumLeft = NumOps) {
+ SDNode *GluedOpN = 0;
+ do {
+ const SDValue &Op = N->getOperand(NumLeft-1);
+ SDNode *OpN = Op.getNode();
+
+ if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+ // Schedule glue operand right above N.
+ GluedOpN = OpN;
+ assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+ OpN->setNodeId(0);
+ ScheduleNode(OpN);
+ continue;
+ }
+
+ if (OpN == GluedOpN)
+ // Glue operand is already scheduled.
+ continue;
+
+ DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+ if (DI != GluedMap.end() && DI->second != N)
+ // Users of glues are counted against the glued users.
+ OpN = DI->second;
+
+ unsigned Degree = OpN->getNodeId();
+ assert(Degree > 0 && "Predecessor over-released!");
+ OpN->setNodeId(--Degree);
+ if (Degree == 0)
+ ScheduleNode(OpN);
+ } while (--NumLeft);
+ }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+ while (SDNode *Glued = N->getGluedUser())
+ N = Glued;
+ return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+ DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+ SmallVector<SDNode*, 8> Glues;
+ unsigned DAGSize = 0;
+ for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+
+ // Use node id to record degree.
+ unsigned Degree = N->use_size();
+ N->setNodeId(Degree);
+ unsigned NumVals = N->getNumValues();
+ if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+ N->hasAnyUseOfValue(NumVals-1)) {
+ SDNode *User = findGluedUser(N);
+ if (User) {
+ Glues.push_back(N);
+ GluedMap.insert(std::make_pair(N, User));
+ }
+ }
+
+ if (N->isMachineOpcode() ||
+ (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+ ++DAGSize;
+ }
+
+ for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+ SDNode *Glue = Glues[i];
+ SDNode *GUser = GluedMap[Glue];
+ unsigned Degree = Glue->getNodeId();
+ unsigned UDegree = GUser->getNodeId();
+
+ // Glue user must be scheduled together with the glue operand. So other
+ // users of the glue operand must be treated as its users.
+ SDNode *ImmGUser = Glue->getGluedUser();
+ for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
+ ui != ue; ++ui)
+ if (*ui == ImmGUser)
+ --Degree;
+ GUser->setNodeId(UDegree + Degree);
+ Glue->setNodeId(1);
+ }
+
+ Sequence.reserve(DAGSize);
+ ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+
+ DEBUG({
+ dbgs() << "\n*** Final schedule ***\n";
+ });
+
+ // FIXME: Handle dbg_values.
+ unsigned NumNodes = Sequence.size();
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ SDNode *N = Sequence[NumNodes-i-1];
+ DEBUG(N->dump(DAG));
+ Emitter.EmitNode(N, false, false, VRBaseMap);
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGFast(*IS->MF);
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 0000000..c009cfc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,3039 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical register copies");
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ "Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+static RegisterScheduler
+ sourceListDAGScheduler("source",
+ "Similar to list-burr but schedules in source "
+ "order when possible",
+ createSourceListDAGScheduler);
+
+static RegisterScheduler
+ hybridListDAGScheduler("list-hybrid",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance latency and register pressure",
+ createHybridListDAGScheduler);
+
+static RegisterScheduler
+ ILPListDAGScheduler("list-ilp",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance ILP and register pressure",
+ createILPListDAGScheduler);
+
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+ "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+ cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+ "disable-sched-live-uses", cl::Hidden, cl::init(true),
+ cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+ "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+ cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+ "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+ cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+ "disable-sched-stalls", cl::Hidden, cl::init(true),
+ cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+ "disable-sched-critical-path", cl::Hidden, cl::init(false),
+ cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+ "disable-sched-height", cl::Hidden, cl::init(false),
+ cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+ "disable-2addr-hack", cl::Hidden, cl::init(true),
+ cl::desc("Disable scheduler's two-address hack"));
+
+static cl::opt<int> MaxReorderWindow(
+ "max-sched-reorder", cl::Hidden, cl::init(6),
+ cl::desc("Number of instructions to allow ahead of the critical path "
+ "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+ "sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+ /// NeedLatency - True if the scheduler will make use of latency information.
+ ///
+ bool NeedLatency;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle;
+
+ /// IssueCount - Count instructions issued in this cycle
+ /// Currently valid only for bottom-up scheduling.
+ unsigned IssueCount;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<SUnit*> LiveRegGens;
+
+ // Collect interferences between physical register use/defs.
+ // Each interference is an SUnit and set of physical registers.
+ SmallVector<SUnit*, 4> Interferences;
+ typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
+ LRegsMapT LRegsMap;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
+ // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+ // DAG crawling.
+ DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
+public:
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf),
+ NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ Topo(SUnits, NULL) {
+
+ const TargetMachine &tm = mf.getTarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
+
+ ~ScheduleDAGRRList() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+ /// create a cycle.
+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ return Topo.WillCreateCycle(SU, TargetSU);
+ }
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Updates the topological ordering if required.
+ void AddPred(SUnit *SU, const SDep &D) {
+ Topo.AddPred(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ /// Updates the topological ordering if required.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ Topo.RemovePred(SU, D.getSUnit());
+ SU->removePred(D);
+ }
+
+private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
+ void ReleasePred(SUnit *SU, const SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
+ void CapturePred(SDep *PredEdge);
+ void UnscheduleNodeBottomUp(SUnit*);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+
+ void releaseInterferences(unsigned Reg = 0);
+
+ SUnit *PickNodeToScheduleBottomUp();
+ void ListScheduleBottomUp();
+
+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+ /// Updates the topological ordering if required.
+ SUnit *CreateNewSUnit(SDNode *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = newSUnit(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// CreateClone - Creates a new SUnit from an existing one.
+ /// Updates the topological ordering if required.
+ SUnit *CreateClone(SUnit *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = Clone(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// need actual latency information but the hybrid scheduler does.
+ bool forceUnitLatencies() const {
+ return !NeedLatency;
+ }
+};
+} // end anonymous namespace
+
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ unsigned &RegClass, unsigned &Cost,
+ const MachineFunction &MF) {
+ MVT VT = RegDefPos.GetValue();
+
+ // Special handling for untyped values. These values can only come from
+ // the expansion of custom DAG-to-DAG patterns.
+ if (VT == MVT::Untyped) {
+ const SDNode *Node = RegDefPos.GetNode();
+
+ // Special handling for CopyFromReg of untyped values.
+ if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Opcode = Node->getMachineOpcode();
+ if (Opcode == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Idx = RegDefPos.GetIdx();
+ const MCInstrDesc Desc = TII->get(Opcode);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
+ RegClass = RC->getID();
+ // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+ // better way to determine it.
+ Cost = 1;
+ } else {
+ RegClass = TLI->getRepRegClassFor(VT)->getID();
+ Cost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
+
+ CurCycle = 0;
+ IssueCount = 0;
+ MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
+ NumLiveRegs = 0;
+ // Allocate slots for each physical register, plus one for a special register
+ // to track the virtual resource of a calling sequence.
+ LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
+ LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+ CallSeqEndForStart.clear();
+ assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(NULL);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+ Topo.InitDAGTopologicalSorting();
+
+ AvailableQueue->initNodes(SUnits);
+
+ HazardRec->Reset();
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+
+ AvailableQueue->releaseState();
+
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ if (!forceUnitLatencies()) {
+ // Updating predecessor's height. This is now the cycle when the
+ // predecessor can be scheduled without causing a pipeline stall.
+ PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+ }
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(PredSU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
+ }
+}
+
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+ unsigned NestLevel,
+ const TargetInstrInfo *TII) {
+ SDNode *N = Outer;
+ for (;;) {
+ if (N == Inner)
+ return true;
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
+ return true;
+ return false;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ if (NestLevel == 0)
+ return false;
+ --NestLevel;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return false;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return false;
+ }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+ const TargetInstrInfo *TII) {
+ for (;;) {
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ SDNode *Best = 0;
+ unsigned BestMaxNest = MaxNest;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ unsigned MyNestLevel = NestLevel;
+ unsigned MyMaxNest = MaxNest;
+ if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
+ MyNestLevel, MyMaxNest, TII))
+ if (!Best || (MyMaxNest > BestMaxNest)) {
+ Best = New;
+ BestMaxNest = MyMaxNest;
+ }
+ }
+ assert(Best);
+ MaxNest = BestMaxNest;
+ return Best;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ MaxNest = std::max(MaxNest, NestLevel);
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NestLevel != 0);
+ --NestLevel;
+ if (NestLevel == 0)
+ return N;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return 0;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return 0;
+ }
+}
+
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ if (!LiveRegGens[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegGens[I->getReg()] = SU;
+ }
+ }
+ }
+
+ // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+ // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+ // these nodes, to prevent other calls from being interscheduled with them.
+ unsigned CallResource = TRI->getNumRegs();
+ if (!LiveRegDefs[CallResource])
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ unsigned NestLevel = 0;
+ unsigned MaxNest = 0;
+ SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+
+ SUnit *Def = &SUnits[N->getNodeId()];
+ CallSeqEndForStart[Def] = SU;
+
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = Def;
+ LiveRegGens[CallResource] = SU;
+ break;
+ }
+}
+
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle = PendingQueue[i]->getHeight();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ IssueCount = 0;
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ HazardRec->RecedeCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ // FIXME: Nodes such as CopyFromReg probably should not advance the current
+ // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+ // has predecessors the cycle will be advanced when they are scheduled.
+ // But given the crude nature of modeling latency though such nodes, we
+ // currently need to treat these nodes like real instructions.
+ // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
+ unsigned ReadyCycle = SU->getHeight();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, -Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+}
+
+static void resetVRegCycle(SUnit *SU);
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
+ DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+#ifndef NDEBUG
+ if (CurCycle < SU->getHeight())
+ DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
+#endif
+
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node its ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
+ SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled intruction.
+ EmitNode(SU);
+
+ Sequence.push_back(SU);
+
+ AvailableQueue->scheduledNode(SU);
+
+ // If HazardRec is disabled, and each inst counts as one cycle, then
+ // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+ // PendingQueue for schedulers that implement HasReadyFilter.
+ if (!HazardRec->isEnabled() && AvgIPC < 2)
+ AdvanceToCycle(CurCycle + 1);
+
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
+ releaseInterferences(I->getReg());
+ }
+ }
+ // Release the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ resetVRegCycle(SU);
+
+ SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, the cycle was pre-advanced before calling
+ // ReleasePredecessors. In that case, IssueCount should remain 0.
+ //
+ // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+ if (HazardRec->isEnabled() || AvgIPC > 1) {
+ if (SU->getNode() && SU->getNode()->isMachineOpcode())
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+ AdvanceToCycle(CurCycle + 1);
+ }
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ if (PredSU->isAvailable) {
+ PredSU->isAvailable = false;
+ if (!PredSU->isPending)
+ AvailableQueue->remove(PredSU);
+ }
+
+ assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+ DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+ DEBUG(SU->dump(this));
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ CapturePred(&*I);
+ if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
+ releaseInterferences(I->getReg());
+ }
+ }
+
+ // Reclaim the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = SU;
+ LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+ }
+ }
+
+ // Release the special call resource dependence, if this is the end
+ // of a call.
+ if (LiveRegGens[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (!LiveRegDefs[I->getReg()])
+ ++NumLiveRegs;
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[I->getReg()] = SU;
+ if (LiveRegGens[I->getReg()] == NULL ||
+ I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+ LiveRegGens[I->getReg()] = I->getSUnit();
+ }
+ }
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
+
+ SU->setHeightDirty();
+ SU->isScheduled = false;
+ SU->isAvailable = true;
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
+ AvailableQueue->unscheduledNode(SU);
+}
+
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
+ Sequence.pop_back();
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
+ UnscheduleNodeBottomUp(OldSU);
+ AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
+ }
+
+ assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
+ ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isOperandOf(N))
+ return true;
+ }
+ return false;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return NULL;
+
+ DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
+ computeLatency(LoadSU);
+ }
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPreds.push_back(*I);
+ else if (isOperandOf(I->getSUnit(), LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
+ const SDep &Pred = ChainPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+ && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ SDep D(LoadSU, SDep::Data, 0);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
+ NewSU = CreateClone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = CreateNewSUnit(NULL);
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = CreateNewSUnit(NULL);
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ else {
+ // Avoid scheduling the def-side copy before other successors. Otherwise
+ // we could introduce another physreg interference on the copy and
+ // continue inserting copies indefinitely.
+ AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(CopyFromSU);
+ AvailableQueue->addNode(CopyToSU);
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = MCID.getNumDefs();
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AliasI]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AliasI] == SU) continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AliasI)) {
+ LRegs.push_back(*AliasI);
+ }
+ }
+}
+
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs) {
+ // Look at all live registers. Skip Reg0 and the special CallResource.
+ for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+ if (!LiveRegDefs[i]) continue;
+ if (LiveRegDefs[i] == SU) continue;
+ if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+ if (RegAdded.insert(i))
+ LRegs.push_back(i);
+ }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (const RegisterMaskSDNode *Op =
+ dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
+ return Op->getRegMask();
+ return NULL;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ // If we're in the middle of scheduling a call, don't begin scheduling
+ // another call. Also, don't allow any physical registers to be live across
+ // the call.
+ if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ // Check the special calling-sequence resource.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource]) {
+ SDNode *Gen = LiveRegGens[CallResource]->getNode();
+ while (SDNode *Glued = Gen->getGluedNode())
+ Gen = Glued;
+ if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
+ LRegs.push_back(CallResource);
+ }
+ }
+ if (const uint32_t *RegMask = getNodeRegMask(Node))
+ CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+
+ return !LRegs.empty();
+}
+
+void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = Interferences.size(); i > 0; --i) {
+ SUnit *SU = Interferences[i-1];
+ LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
+ if (Reg) {
+ SmallVector<unsigned, 4> &LRegs = LRegsPos->second;
+ if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
+ continue;
+ }
+ SU->isPending = false;
+ // The interfering node may no longer be available due to backtracking.
+ // Furthermore, it may have been made available again, in which case it is
+ // now already in the AvailableQueue.
+ if (SU->isAvailable && !SU->NodeQueueId) {
+ DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
+ AvailableQueue->push(SU);
+ }
+ if (i < Interferences.size())
+ Interferences[i-1] = Interferences.back();
+ Interferences.pop_back();
+ LRegsMap.erase(LRegsPos);
+ }
+}
+
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ DEBUG(dbgs() << " Interfering reg " <<
+ (LRegs[0] == TRI->getNumRegs() ? "CallResource"
+ : TRI->getName(LRegs[0]))
+ << " SU #" << CurSU->NodeNum << '\n');
+ std::pair<LRegsMapT::iterator, bool> LRegsPair =
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ if (LRegsPair.second) {
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ }
+ else {
+ assert(CurSU->isPending && "Intereferences are pending");
+ // Update the interference with current live regs.
+ LRegsPair.first->second = LRegs;
+ }
+ CurSU = AvailableQueue->pop();
+ }
+ if (CurSU)
+ return CurSU;
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ SUnit *TrySU = Interferences[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = NULL;
+ unsigned LiveCycle = UINT_MAX;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ // BacktrackBottomUp mutates Interferences!
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
+ << TrySU->NodeNum << ")\n");
+ AddPred(TrySU, SDep(BtSU, SDep::Artificial));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer available.
+ if (!TrySU->isAvailable)
+ CurSU = AvailableQueue->pop();
+ else {
+ AvailableQueue->remove(TrySU);
+ CurSU = TrySU;
+ }
+ // Interferences has been mutated. We must break.
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be possible
+ // copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's possible to
+ // copy the value but it require cross register class copies and it is
+ // expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = 0;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+ return CurSU;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue->push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !Interferences.empty()) {
+ DEBUG(dbgs() << "\nExamining Available:\n";
+ AvailableQueue->dump(this));
+
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
+
+ AdvancePastStalls(SU);
+
+ ScheduleNodeBottomUp(SU);
+
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
+ }
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+ SF &SortFunc;
+ reverse_sort(SF &sf) : SortFunc(sf) {}
+ reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+ bool operator()(SUnit* left, SUnit* right) const {
+ // reverse left/right rather than simply !SortFunc(left, right)
+ // to expose different paths in the comparison logic.
+ return SortFunc(right, left);
+ }
+};
+#endif // NDEBUG
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ src_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ src_ls_rr_sort(const src_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ hybrid_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ ilp_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit*> Queue;
+ unsigned CurQueueId;
+ bool TracksRegPressure;
+ bool SrcOrder;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter),
+ CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits);
+
+ void addNode(const SUnit *SU);
+
+ void updateNode(const SUnit *SU);
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ if (!SU->getNode()) return 0;
+
+ return scheduleDAG->DAG->GetOrdering(SU->getNode());
+ }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU) const;
+
+ int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
+
+ void scheduledNode(SUnit *SU);
+
+ void unscheduledNode(SUnit *SU);
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+ if (DAG->StressSched) {
+ reverse_sort<SF> RPicker(Picker);
+ return popFromQueueImpl(Q, RPicker);
+ }
+#endif
+ (void)DAG;
+ return popFromQueueImpl(Q, Picker);
+}
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+ tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() {
+ if (Queue.empty()) return NULL;
+
+ SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump(ScheduleDAG *DAG) const {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit*> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ SU->dump(DAG);
+ }
+ }
+#endif
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+ bool LSchedLow = left->isScheduleLow;
+ bool RSchedLow = right->isScheduleLow;
+ if (LSchedLow != RSchedLow)
+ return LSchedLow < RSchedLow ? 1 : -1;
+ return 0;
+}
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Extra = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+
+ return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+#if 1
+ return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
+
+void RegReductionPQBase::dumpRegPressure() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+ << '\n');
+ }
+#endif
+}
+
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
+ const SDNode *N = SU->getNode();
+
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+ LiveUses = 0;
+ int PDiff = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ if (PredSU->getNode()->isMachineOpcode())
+ ++LiveUses;
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ MVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ ++PDiff;
+ }
+ }
+ const SDNode *N = SU->getNode();
+
+ if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+ return PDiff;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ --PDiff;
+ }
+ return PDiff;
+}
+
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ if (!SU->getNode())
+ return;
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ RegPressure[RCId] += Cost;
+ break;
+ }
+ }
+
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ if (RegPressure[RCId] < Cost) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= Cost;
+ }
+ }
+ dumpRegPressure();
+}
+
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N) return;
+
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+ POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = PN->getSimpleValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ dumpRegPressure();
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain succs
+ unsigned Height = I->getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(I->getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *PredSU = I->getSUnit();
+ if (PredSU->getNode() &&
+ PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+ if (DisableSchedVRegCycle)
+ return;
+
+ if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+ return;
+
+ DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+ SU->isVRegCycle = true;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ I->getSUnit()->isVRegCycle = true;
+ }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+ if (!SU->isVRegCycle)
+ return;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isVRegCycle) {
+ assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+ "VRegCycle def must be CopyFromReg");
+ I->getSUnit()->isVRegCycle = 0;
+ }
+ }
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+ // If this SU also defines the VReg, don't hoist it as a "use".
+ if (SU->isVRegCycle)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->isVRegCycle &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ return true;
+ }
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+ int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+ int LHeight = (int)left->getHeight() + LPenalty;
+ int RHeight = (int)right->getHeight() + RPenalty;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall)
+ return 1;
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ } else if (RStall)
+ return -1;
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+ right->SchedulingPref == Sched::ILP)) {
+ // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
+ // is enabled, grouping instructions by cycle, then its height is already
+ // covered so only its depth matters. We also reach this point if both stall
+ // but have the same height.
+ if (!SPQ->getHazardRec()->isEnabled()) {
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ }
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
+ if (LDepth != RDepth) {
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+ // Schedule physical register definitions close to their use. This is
+ // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+ // long as shortening physreg live ranges is generally good, we can defer
+ // creating a subtarget hook.
+ if (!DisableSchedPhysRegJoin) {
+ bool LHasPhysReg = left->hasPhysRegDefs;
+ bool RHasPhysReg = right->hasPhysRegDefs;
+ if (LHasPhysReg != RHasPhysReg) {
+ #ifndef NDEBUG
+ const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"};
+ #endif
+ DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+ << PhysRegMsg[RHasPhysReg] << "\n");
+ return LHasPhysReg < RHasPhysReg;
+ }
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
+ if (LPriority != RPriority)
+ return LPriority > RPriority;
+
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist)
+ return LDist < RDist;
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch)
+ return LScratch > RScratch;
+
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
+
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+ return BURRSort(left, right, SPQ);
+}
+
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
+ return true;
+ }
+ else if (!LHigh && RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
+ return false;
+ }
+ if (!LHigh && !RHigh) {
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ return BURRSort(left, right, SPQ);
+}
+
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+static bool canEnableCoalescing(SUnit *SU) {
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return true;
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return true;
+
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return true;
+
+ return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ unsigned LLiveUses = 0, RLiveUses = 0;
+ int LPDiff = 0, RPDiff = 0;
+ if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+ LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+ RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+ }
+ if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+ DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
+ << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+ return LPDiff > RPDiff;
+ }
+
+ if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+ bool LReduce = canEnableCoalescing(left);
+ bool RReduce = canEnableCoalescing(right);
+ if (LReduce && !RReduce) return false;
+ if (RReduce && !LReduce) return true;
+ }
+
+ if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+ DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+ return LLiveUses < RLiveUses;
+ }
+
+ if (!DisableSchedStalls) {
+ bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+ bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+ if (LStall != RStall)
+ return left->getHeight() > right->getHeight();
+ }
+
+ if (!DisableSchedCriticalPath) {
+ int spread = (int)left->getDepth() - (int)right->getDepth();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum << "): "
+ << right->getDepth() << "\n");
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+ int spread = (int)left->getHeight() - (int)right->getHeight();
+ if (std::abs(spread) > MaxReorderWindow)
+ return left->getHeight() > right->getHeight();
+ }
+
+ return BURRSort(left, right, SPQ);
+}
+
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ if (!Disable2AddrHack)
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure && !SrcOrder)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+
+ // For single block loops, mark nodes that look like canonical IV increments.
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
+ for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
+ initVRegCycle(&sunits[i]);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->getNode()->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
+ SDNode *DU = SU->getNode()->getOperand(i).getNode();
+ if (DU->getNodeId() != -1 &&
+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
+/// successor's explicit physregs whose definition can reach DepSU.
+/// i.e. DepSU should not be scheduled above SU.
+static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
+ ScheduleDAGRRList *scheduleDAG,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ const uint16_t *ImpDefs
+ = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
+ const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+ if(!ImpDefs && !RegMask)
+ return false;
+
+ for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
+ SI != SE; ++SI) {
+ SUnit *SuccSU = SI->getSUnit();
+ for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
+ PE = SuccSU->Preds.end(); PI != PE; ++PI) {
+ if (!PI->isAssignedRegDep())
+ continue;
+
+ if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+
+ if (ImpDefs)
+ for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+ }
+ }
+ return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SDNode *N = SuccSU->getNode();
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (!SUNode->isMachineOpcode())
+ continue;
+ const uint16_t *SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ const uint32_t *SURegMask = getNodeRegMask(SUNode);
+ if (!SUImpDefs && !SURegMask)
+ continue;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned Reg = ImpDefs[i - NumDefs];
+ if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+ return true;
+ if (!SUImpDefs)
+ continue;
+ for (;*SUImpDefs; ++SUImpDefs) {
+ unsigned SUReg = *SUImpDefs;
+ if (TRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+/// N
+/// / |
+/// / |
+/// U store
+/// |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+/// N
+/// ||
+/// ||
+/// store
+/// |
+/// U
+/// |
+/// ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
+ // Visit all the nodes in topological order, working top-down.
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ // For now, only look at nodes with no data successors, such as stores.
+ // These are especially important, due to the heuristics in
+ // getNodePriority for nodes with no data successors.
+ if (SU->NumSuccs != 0)
+ continue;
+ // For now, only look at nodes with exactly one data predecessor.
+ if (SU->NumPreds != 1)
+ continue;
+ // Avoid prescheduling copies to virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyToReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Locate the single data predecessor.
+ SUnit *PredSU = 0;
+ for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+ EE = SU->Preds.end(); II != EE; ++II)
+ if (!II->isCtrl()) {
+ PredSU = II->getSUnit();
+ break;
+ }
+ assert(PredSU);
+
+ // Don't rewrite edges that carry physregs, because that requires additional
+ // support infrastructure.
+ if (PredSU->hasPhysRegDefs)
+ continue;
+ // Short-circuit the case where SU is PredSU's only data successor.
+ if (PredSU->NumSuccs == 1)
+ continue;
+ // Avoid prescheduling to copies from virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyFromReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Perform checks on the successors of PredSU.
+ for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ SUnit *PredSuccSU = II->getSUnit();
+ if (PredSuccSU == SU) continue;
+ // If PredSU has another successor with no data successors, for
+ // now don't attempt to choose either over the other.
+ if (PredSuccSU->NumSuccs == 0)
+ goto outer_loop_continue;
+ // Don't break physical register dependencies.
+ if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+ goto outer_loop_continue;
+ // Don't introduce graph cycles.
+ if (scheduleDAG->IsReachable(SU, PredSuccSU))
+ goto outer_loop_continue;
+ }
+
+ // Ok, the transformation is safe and the heuristics suggest it is
+ // profitable. Update the graph.
+ DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
+ << " next to PredSU #" << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n");
+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+ SDep Edge = PredSU->Succs[i];
+ assert(!Edge.isAssignedRegDep());
+ SUnit *SuccSU = Edge.getSUnit();
+ if (SuccSU != SU) {
+ Edge.setSUnit(PredSU);
+ scheduleDAG->RemovePred(SuccSU, Edge);
+ scheduleDAG->AddPred(SU, Edge);
+ Edge.setSUnit(SU);
+ scheduleDAG->AddPred(SuccSU, Edge);
+ --i;
+ }
+ }
+ outer_loop_continue:;
+ }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ if (!SU->isTwoAddress)
+ continue;
+
+ SDNode *Node = SU->getNode();
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
+ continue;
+
+ bool isLiveOut = hasOnlyLiveOutUses(SU);
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
+ continue;
+ SDNode *DU = SU->getNode()->getOperand(j).getNode();
+ if (DU->getNodeId() == -1)
+ continue;
+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+ if (!DUSU) continue;
+ for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
+ E = DUSU->Succs.end(); I != E; ++I) {
+ if (I->isCtrl()) continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU == SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->getHeight() < SU->getHeight() &&
+ (SU->getHeight() - SuccSU->getHeight()) > 1)
+ continue;
+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+ // constrains whatever is using the copy, instead of the copy
+ // itself. In the case that the copy is coalesced, this
+ // preserves the intent of the pseudo two-address heurietics.
+ while (SuccSU->Succs.size() == 1 &&
+ SuccSU->getNode()->isMachineOpcode() &&
+ SuccSU->getNode()->getMachineOpcode() ==
+ TargetOpcode::COPY_TO_REGCLASS)
+ SuccSU = SuccSU->Succs.front().getSUnit();
+ // Don't constrain non-instruction nodes.
+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can clobber them.
+ if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+ continue;
+ }
+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+ // these may be coalesced away. We want them close to their uses.
+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+ if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+ SuccOpc == TargetOpcode::INSERT_SUBREG ||
+ SuccOpc == TargetOpcode::SUBREG_TO_REG)
+ continue;
+ if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
+ (!canClobber(SuccSU, DUSU) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
+ (!SU->isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, SU)) {
+ DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
+ << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial));
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ BURegReductionPriorityQueue *PQ =
+ new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ SrcRegReductionPriorityQueue *PQ =
+ new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
+
+ HybridBURRPriorityQueue *PQ =
+ new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
+
+ ILPBURRPriorityQueue *PQ =
+ new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 0000000..b22440d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,914 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+// This allows latency based scheduler to notice high latency instructions
+// without a target itinerary. The choise if number here has more to do with
+// balancing scheduler heursitics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency'"
+ "instructions take for targets with no itinerary"));
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+ : ScheduleDAG(mf), BB(0), DAG(0),
+ InstrItins(mf.getTarget().getInstrItineraryData()) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+ BB = bb;
+ DAG = dag;
+
+ // Clear the scheduler's SUnit DAG.
+ ScheduleDAG::clearDAG();
+ Sequence.clear();
+
+ // Invoke the target's selection of scheduler.
+ Schedule();
+}
+
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = 0;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ SUnit *SU = &SUnits.back();
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ if (!N ||
+ (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
+ SU->SchedulingPref = Sched::None;
+ else
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ return SU;
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+ SUnit *SU = newSUnit(Old->getNode());
+ SU->OrigNode = Old->OrigNode;
+ SU->Latency = Old->Latency;
+ SU->isVRegCycle = Old->isVRegCycle;
+ SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
+ SU->isTwoAddress = Old->isTwoAddress;
+ SU->isCommutable = Old->isCommutable;
+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->isScheduleHigh = Old->isScheduleHigh;
+ SU->isScheduleLow = Old->isScheduleLow;
+ SU->SchedulingPref = Old->SchedulingPref;
+ Old->isCloned = true;
+ return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ unsigned &PhysReg, int &Cost) {
+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+ return;
+
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return;
+
+ unsigned ResNo = User->getOperand(2).getResNo();
+ if (Def->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (ResNo >= II.getNumDefs() &&
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+ PhysReg = Reg;
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
+ }
+}
+
+// Helper for AddGlue to clone node operands.
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
+ SmallVectorImpl<EVT> &VTs,
+ SDValue ExtraOper = SDValue()) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+ Ops.push_back(N->getOperand(I));
+
+ if (ExtraOper.getNode())
+ Ops.push_back(ExtraOper);
+
+ SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+ MachineSDNode::mmo_iterator Begin = 0, End = 0;
+ MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+ // Store memory references.
+ if (MN) {
+ Begin = MN->memoperands_begin();
+ End = MN->memoperands_end();
+ }
+
+ DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+
+ // Reset the memory references
+ if (MN)
+ MN->setMemRefs(Begin, End);
+}
+
+static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
+ SmallVector<EVT, 4> VTs;
+ SDNode *GlueDestNode = Glue.getNode();
+
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return false;
+
+ // Don't add a glue operand to something that already uses glue.
+ if (GlueDestNode &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ return false;
+ }
+ // Don't add glue to something that already has a glue value.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
+
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
+
+ CloneNodeWithValues(N, DAG, VTs, Glue);
+
+ return true;
+}
+
+// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
+// node even though simply shrinking the value list is sufficient.
+static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
+ assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
+ !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
+ "expected an unused glue value");
+
+ SmallVector<EVT, 4> VTs;
+ for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
+ CloneNodeWithValues(N, DAG, VTs);
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+ SDNode *Chain = 0;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1).getNode();
+ if (!Chain)
+ return;
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
+ SmallPtrSet<SDNode*, 16> Visited;
+ SmallVector<int64_t, 4> Offsets;
+ DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
+ bool Cluster = false;
+ SDNode *Base = Node;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E; ++I) {
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User))
+ continue;
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2)
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
+ continue;
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1)
+ Base = User;
+ Cluster = true;
+ }
+
+ if (!Cluster)
+ return;
+
+ // Sort them in increasing order.
+ std::sort(Offsets.begin(), Offsets.end());
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
+
+ if (NumLoads == 0)
+ return;
+
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ SDValue InGlue = SDValue(0, 0);
+ if (AddGlue(Lead, InGlue, true, DAG))
+ InGlue = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutGlue = I < E - 1;
+ SDNode *Load = Loads[I];
+
+ // If AddGlue fails, we could leave an unsused glue value. This should not
+ // cause any
+ if (AddGlue(Load, InGlue, OutGlue, DAG)) {
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
+
+ ++LoadsClustered;
+ }
+ else if (!OutGlue && InGlue.getNode())
+ RemoveUnusedGlue(InGlue.getNode(), DAG);
+ }
+}
+
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ SDNode *Node = &*NI;
+ if (!Node || !Node->isMachineOpcode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.mayLoad())
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads(Node);
+ }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+ // During scheduling, the NodeId field of SDNode is used to map SDNodes
+ // to their associated SUnits by holding SUnits table indices. A value
+ // of -1 means the SDNode does not yet have an associated SUnit.
+ unsigned NumNodes = 0;
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ NI->setNodeId(-1);
+ ++NumNodes;
+ }
+
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+ // This is a temporary workaround.
+ SUnits.reserve(NumNodes * 2);
+
+ // Add all nodes in depth first order.
+ SmallVector<SDNode*, 64> Worklist;
+ SmallPtrSet<SDNode*, 64> Visited;
+ Worklist.push_back(DAG->getRoot().getNode());
+ Visited.insert(DAG->getRoot().getNode());
+
+ SmallVector<SUnit*, 8> CallSUnits;
+ while (!Worklist.empty()) {
+ SDNode *NI = Worklist.pop_back_val();
+
+ // Add all operands to the worklist unless they've already been added.
+ for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
+ if (Visited.insert(NI->getOperand(i).getNode()))
+ Worklist.push_back(NI->getOperand(i).getNode());
+
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (NI->getNodeId() != -1) continue;
+
+ SUnit *NodeSUnit = newSUnit(NI);
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
+ SDNode *N = NI;
+ while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ N = N->getOperand(N->getNumOperands()-1).getNode();
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ }
+
+ // Scan down to find any glued succs.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ if (GlueVal.isOperandOf(*UI)) {
+ HasGlueUse = true;
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ N = *UI;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ break;
+ }
+ if (!HasGlueUse) break;
+ }
+
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
+ // Schedule zero-latency TokenFactor below any nodes that may increase the
+ // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+ // have false stalls.
+ if (NI->getOpcode() == ISD::TokenFactor)
+ NodeSUnit->isScheduleLow = true;
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
+ // Update the SUnit.
+ NodeSUnit->setNode(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
+ // Assign the Latency field of NodeSUnit using target-provided information.
+ computeLatency(NodeSUnit);
+ }
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = forceUnitLatencies();
+
+ // Pass 2: add the preds, succs, etc.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ SUnit *SU = &SUnits[su];
+ SDNode *MainNode = SU->getNode();
+
+ if (MainNode->isMachineOpcode()) {
+ unsigned Opc = MainNode->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ SU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ SU->isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode() &&
+ TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ SU->hasPhysRegClobbers = true;
+ unsigned NumUsed = InstrEmitter::CountResults(N);
+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+ --NumUsed; // Skip over unused values at the end.
+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+ SU->hasPhysRegDefs = true;
+ }
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).getNode();
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = &SUnits[OpN->getNodeId()];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == SU) continue; // In the same group.
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ unsigned PhysReg = 0;
+ int Cost = 1;
+ // Determine if this is a physical register dependency.
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ assert((PhysReg == 0 || !isChain) &&
+ "Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0 && !StressSched)
+ PhysReg = 0;
+
+ // If this is a ctrl dep, latency is 1.
+ unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ // Special-case TokenFactor chains as zero-latency.
+ if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+ OpLatency = 0;
+
+ SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
+ : SDep(OpSU, SDep::Data, PhysReg);
+ Dep.setLatency(OpLatency);
+ if (!isChain && !UnitLatencies) {
+ computeOperandLatency(OpN, N, i, Dep);
+ ST.adjustSchedDependency(OpSU, SU, Dep);
+ }
+
+ if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ //
+ // We can't tell (without more book-keeping) if this results from
+ // glued nodes or duplicate operands. As long as we don't reduce
+ // NumRegDefsLeft to zero, we handle the common cases well.
+ --OpSU->NumRegDefsLeft;
+ }
+ }
+ }
+ }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input. This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// glued together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+ // Cluster certain nodes which should be scheduled together.
+ ClusterNodes();
+ // Populate the SUnits array.
+ BuildSchedUnits();
+ // Compute all the scheduling dependencies between nodes.
+ AddSchedEdges();
+}
+
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ // Check for phys reg copy.
+ if (!Node)
+ return;
+
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ ValueType = Node->getSimpleValueType(DefIdx);
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (Node == NULL) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
+ SDNode *N = SU->getNode();
+
+ // TokenFactor operands are considered zero latency, and some schedulers
+ // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+ // whenever node latency is nonzero.
+ if (N && N->getOpcode() == ISD::TokenFactor) {
+ SU->Latency = 0;
+ return;
+ }
+
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies()) {
+ SU->Latency = 1;
+ return;
+ }
+
+ if (!InstrItins || InstrItins->isEmpty()) {
+ if (N && N->isMachineOpcode() &&
+ TII->isHighLatencyDef(N->getMachineOpcode()))
+ SU->Latency = HighLatencyCycles;
+ else
+ SU->Latency = 1;
+ return;
+ }
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes glued together into this SUnit.
+ SU->Latency = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
+}
+
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
+ }
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (!SU->getNode()) {
+ dbgs() << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU->getNode()->dump(DAG);
+ dbgs() << "\n";
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ dbgs() << " ";
+ GluedNodes.back()->dump(DAG);
+ dbgs() << "\n";
+ GluedNodes.pop_back();
+ }
+#endif
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGSDNodes::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+#endif
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+ unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
+namespace {
+ struct OrderSorter {
+ bool operator()(const std::pair<unsigned, MachineInstr*> &A,
+ const std::pair<unsigned, MachineInstr*> &B) {
+ return A.first < B.first;
+ }
+ };
+}
+
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ unsigned Order) {
+ if (!N->getHasDebugValue())
+ return;
+
+ // Opportunistically insert immediate dbg_value uses, i.e. those with source
+ // order number right after the N.
+ MachineBasicBlock *BB = Emitter.getBlock();
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
+ for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
+ if (DVs[i]->isInvalidated())
+ continue;
+ unsigned DVOrder = DVs[i]->getOrder();
+ if (!Order || DVOrder == ++Order) {
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+ if (DbgMI) {
+ Orders.push_back(std::make_pair(DVOrder, DbgMI));
+ BB->insert(InsertPos, DbgMI);
+ }
+ DVs[i]->setIsInvalidated();
+ }
+ }
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ SmallSet<unsigned, 8> &Seen) {
+ unsigned Order = DAG->GetOrdering(N);
+ if (!Order || !Seen.insert(Order)) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+ return;
+ }
+
+ MachineBasicBlock *BB = Emitter.getBlock();
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+ // Did not insert any instruction.
+ Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+ return;
+ }
+
+ Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
+ }
+ break;
+ }
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+ SmallSet<unsigned, 8> Seen;
+ bool HasDbg = DAG->hasDebugValues();
+
+ // If this is the first BB, emit byval parameter dbg_value's.
+ if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+ SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+ SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+ for (; PDI != PDE; ++PDI) {
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+ if (DbgMI)
+ BB->insert(InsertPos, DbgMI);
+ }
+ }
+
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ TII->insertNoop(*Emitter.getBlock(), InsertPos);
+ continue;
+ }
+
+ // For pre-regalloc scheduling, create instructions corresponding to the
+ // SDNode and any glued SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
+ continue;
+ }
+
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
+ GluedNodes.pop_back();
+ }
+ Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
+ Seen);
+ }
+
+ // Insert all the dbg_values which have not already been inserted in source
+ // order sequence.
+ if (HasDbg) {
+ MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
+
+ // Sort the source order instructions and use the order to insert debug
+ // values.
+ std::sort(Orders.begin(), Orders.end(), OrderSorter());
+
+ SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+ SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+ // Now emit the rest according to source order.
+ unsigned LastOrder = 0;
+ for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+ unsigned Order = Orders[i].first;
+ MachineInstr *MI = Orders[i].second;
+ // Insert all SDDbgValue's whose order(s) are before "Order".
+ if (!MI)
+ continue;
+ for (; DI != DE &&
+ (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+ if ((*DI)->isInvalidated())
+ continue;
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(llvm::next(Pos), DbgMI);
+ }
+ }
+ }
+ LastOrder = Order;
+ }
+ // Add trailing DbgValue's before the terminator. FIXME: May want to add
+ // some of them before one or more conditional branches?
+ SmallVector<MachineInstr*, 8> DbgMIs;
+ while (DI != DE) {
+ if (!(*DI)->isInvalidated())
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
+ ++DI;
+ }
+
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+ InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
+ }
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+ return "sunit-dag." + BB->getFullName();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 0000000..2ff37e0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,185 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGSDNODES_H
+#define SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+
+namespace llvm {
+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+ ///
+ /// Edges between SUnits are initially based on edges in the SelectionDAG,
+ /// and additional edges can be added by the schedulers as heuristics.
+ /// SDNodes such as Constants, Registers, and a few others that are not
+ /// interesting to schedulers are not allocated SUnits.
+ ///
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
+ /// nodes into a single SUnit so that they are scheduled together.
+ ///
+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+ /// edges. Physical register dependence information is not carried in
+ /// the DAG and must be handled explicitly by schedulers.
+ ///
+ class ScheduleDAGSDNodes : public ScheduleDAG {
+ public:
+ MachineBasicBlock *BB;
+ SelectionDAG *DAG; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
+ explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+ virtual ~ScheduleDAGSDNodes() {}
+
+ /// Run - perform scheduling.
+ ///
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb);
+
+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+ ///
+ static bool isPassiveNode(SDNode *Node) {
+ if (isa<ConstantSDNode>(Node)) return true;
+ if (isa<ConstantFPSDNode>(Node)) return true;
+ if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<RegisterMaskSDNode>(Node)) return true;
+ if (isa<GlobalAddressSDNode>(Node)) return true;
+ if (isa<BasicBlockSDNode>(Node)) return true;
+ if (isa<FrameIndexSDNode>(Node)) return true;
+ if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<TargetIndexSDNode>(Node)) return true;
+ if (isa<JumpTableSDNode>(Node)) return true;
+ if (isa<ExternalSymbolSDNode>(Node)) return true;
+ if (isa<BlockAddressSDNode>(Node)) return true;
+ if (Node->getOpcode() == ISD::EntryToken ||
+ isa<MDNodeSDNode>(Node)) return true;
+ return false;
+ }
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *newSUnit(SDNode *N);
+
+ /// Clone - Creates a clone of the specified SUnit. It does not copy the
+ /// predecessors / successors info nor the temporary scheduling states.
+ ///
+ SUnit *Clone(SUnit *N);
+
+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+ /// are input. This SUnit graph is similar to the SelectionDAG, but
+ /// excludes nodes that aren't interesting to scheduling, and represents
+ /// flagged together nodes with a single SUnit.
+ void BuildSchedGraph(AliasAnalysis *AA);
+
+ /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
+ /// CopyToReg and its only active data operands are CopyFromReg within a
+ /// single block loop.
+ ///
+ void InitVRegCycleFlag(SUnit *SU);
+
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
+ /// computeLatency - Compute node latency.
+ ///
+ virtual void computeLatency(SUnit *SU);
+
+ virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const;
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+ /// consistent with the Sequence of scheduled instructions.
+ void VerifyScheduledSequence(bool isBottomUp);
+
+ /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+ /// according to the order specified in Sequence.
+ ///
+ virtual MachineBasicBlock*
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ void dumpSchedule() const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+ virtual std::string getDAGName() const;
+
+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ MVT ValueType;
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != NULL; }
+
+ MVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ const SDNode *GetNode() const {
+ return Node;
+ }
+
+ unsigned GetIdx() const {
+ return DefIdx-1;
+ }
+
+ void Advance();
+ private:
+ void InitNodeNumDefs();
+ };
+
+ protected:
+ /// ForceUnitLatencies - Return true if all scheduling edges should be given
+ /// a latency value of one. The default is to return false; schedulers may
+ /// override this as needed.
+ virtual bool forceUnitLatencies() const { return false; }
+
+ private:
+ /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+ /// combined SUnits.
+ void ClusterNeighboringLoads(SDNode *Node);
+ /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+ ///
+ void ClusterNodes();
+
+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+ void BuildSchedUnits();
+ void AddSchedEdges();
+
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
new file mode 100644
index 0000000..58aa1fe
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -0,0 +1,278 @@
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ VLIWScheduler("vliw-td", "VLIW scheduler",
+ createVLIWDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This
+/// supports / top-down scheduling.
+///
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+public:
+ ScheduleDAGVLIW(MachineFunction &mf,
+ AliasAnalysis *aa,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+
+ const TargetMachine &tm = mf.getTarget();
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
+
+ ~ScheduleDAGVLIW() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+private:
+ void releaseSucc(SUnit *SU, const SDep &D);
+ void releaseSuccessors(SUnit *SU);
+ void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void listScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGVLIW::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(AA);
+
+ AvailableQueue->initNodes(SUnits);
+
+ listScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ assert(!D.isWeak() && "unexpected artificial DAG edge");
+
+ --SuccSU->NumPredsLeft;
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ PendingQueue.push_back(SuccSU);
+ }
+}
+
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ releaseSucc(SU, *I);
+ }
+}
+
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ releaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->scheduledNode(SU);
+}
+
+/// listScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGVLIW::listScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ releaseSuccessors(&EntrySU);
+
+ // All leaves to AvailableQueue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While AvailableQueue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+ else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ // Reset DFA state.
+ AvailableQueue->scheduledNode(0);
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ scheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop\n");
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 0000000..6424431
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,6382 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeDbgValue.h"
+#include "SDNodeOrdering.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+// Default null implementations of the callbacks.
+void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+ return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+ const APFloat& Val) {
+ assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target and
+ // a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all ones, not whether the individual
+ // constants are.
+ SDValue NotZero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
+ if (CN->getAPIntValue().countTrailingOnes() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs. Even with the above element type twiddling, this should be OK, as
+ // the same type legalization should have applied to all the elements.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-0
+ // elements.
+ SDValue Zero = N->getOperand(i);
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+ if (!CN->isNullValue())
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+ if (!CFPN->getValueAPF().isPosZero())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one 0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != Zero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+ return false;
+ unsigned NumElems = N->getNumOperands();
+ if (NumElems == 1)
+ return false;
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDValue V = N->getOperand(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ return false;
+ }
+ return true;
+}
+
+/// allOperandsUndef - Return true if the node has at least one operand
+/// and all operands of the specified node are ISD::UNDEF.
+bool ISD::allOperandsUndef(const SDNode *N) {
+ // Return false if the node has no operands.
+ // This is "logically inconsistent" with the definition of "all" but
+ // is probably the desired behavior.
+ if (N->getNumOperands() == 0)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+
+ return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+ unsigned Operation = Op;
+ if (isInteger)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+
+ return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison. Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set then the resultant comparison DOES suddenly
+ // care about orderedness, and is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDValue *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDUse *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+ unsigned short OpC, SDVTList VTList,
+ const SDValue *OpList, unsigned N) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
+/// the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::TargetExternalSymbol:
+ case ISD::ExternalSymbol:
+ llvm_unreachable("Should only be used on nodes with operands");
+ default: break; // Normal nodes don't need extra info.
+ case ISD::TargetConstant:
+ case ISD::Constant:
+ ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+ break;
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+ break;
+ }
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ ID.AddInteger(GA->getTargetFlags());
+ ID.AddInteger(GA->getAddressSpace());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+ case ISD::RegisterMask:
+ ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+ break;
+ case ISD::SRCVALUE:
+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+ break;
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ ID.AddInteger(CP->getTargetFlags());
+ break;
+ }
+ case ISD::TargetIndex: {
+ const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+ ID.AddInteger(TI->getIndex());
+ ID.AddInteger(TI->getOffset());
+ ID.AddInteger(TI->getTargetFlags());
+ break;
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::STORE: {
+ const StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: {
+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+ ID.AddInteger(AT->getMemoryVT().getRawBits());
+ ID.AddInteger(AT->getRawSubclassData());
+ ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::PREFETCH: {
+ const MemSDNode *PF = cast<MemSDNode>(N);
+ ID.AddInteger(PF->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+ i != e; ++i)
+ ID.AddInteger(SVN->getMaskElt(i));
+ break;
+ }
+ case ISD::TargetBlockAddress:
+ case ISD::BlockAddress: {
+ const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
+ ID.AddPointer(BA->getBlockAddress());
+ ID.AddInteger(BA->getOffset());
+ ID.AddInteger(BA->getTargetFlags());
+ break;
+ }
+ } // end switch (N->getOpcode())
+
+ // Target specific memory nodes could also have address spaces to check.
+ if (N->isTargetMemoryOpcode())
+ ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+ // Handle SDNode leafs with special info.
+ AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries volatility, temporalness, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+ bool isNonTemporal, bool isInvariant) {
+ assert((ConvType & 3) == ConvType &&
+ "ConvType may not require more than 2 bits!");
+ assert((AM & 7) == AM &&
+ "AM may not require more than 3 bits!");
+ return ConvType |
+ (AM << 2) |
+ (isVolatile << 5) |
+ (isNonTemporal << 6) |
+ (isInvariant << 7);
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+ if (N->getValueType(0) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::HANDLENODE:
+ case ISD::EH_LABEL:
+ return true; // Never CSE these nodes.
+ }
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+ if (I->use_empty())
+ DeadNodes.push_back(I);
+
+ RemoveDeadNodes(DeadNodes);
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.pop_back_val();
+
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, 0);
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Operand = Use.getNode();
+ Use.set(SDValue());
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+
+ DeallocateNode(N);
+ }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N){
+ SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+ // Create a dummy node that adds a reference to the root node, preventing
+ // it from being deleted. (This matters if the root is an operand of the
+ // dead node.)
+ HandleSDNode Dummy(getRoot());
+
+ RemoveDeadNodes(DeadNodes);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+ assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // Drop all of the operands and decrement used node's use counts.
+ N->DropOperands();
+
+ DeallocateNode(N);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+
+ // Set the opcode to DELETED_NODE to help catch bugs when node
+ // memory is reallocated.
+ N->NodeType = ISD::DELETED_NODE;
+
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+
+ // Remove the ordering of this node.
+ Ordering->remove(N);
+
+ // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
+ ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
+ for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
+ DbgVals[i]->setIsInvalidated();
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::HANDLENODE: return false; // noop.
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol: {
+ ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+ Erased = TargetExternalSymbols.erase(
+ std::pair<std::string,unsigned char>(ESN->getSymbol(),
+ ESN->getTargetFlags()));
+ break;
+ }
+ case ISD::VALUETYPE: {
+ EVT VT = cast<VTSDNode>(N)->getVT();
+ if (VT.isExtended()) {
+ Erased = ExtendedValueTypeNodes.erase(VT);
+ } else {
+ Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
+ ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
+ }
+ break;
+ }
+ default:
+ // Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
+ !N->isMachineOpcode() && !doNotCSE(N)) {
+ N->dump(this);
+ dbgs() << "\n";
+ llvm_unreachable("Node is not in map!");
+ }
+#endif
+ return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
+ // For node types that aren't CSE'd, just act as if no identical node
+ // already exists.
+ if (!doNotCSE(N)) {
+ SDNode *Existing = CSEMap.GetOrInsertNode(N);
+ if (Existing != N) {
+ // If there was already an existing matching node, use ReplaceAllUsesWith
+ // to replace the dead one with the existing one. This can cause
+ // recursive merging of other unrelated nodes down the line.
+ ReplaceAllUsesWith(N, Existing);
+
+ // N is now dead. Inform the listeners and delete it.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, Existing);
+ DeleteNodeNotInCSEMaps(N);
+ return;
+ }
+ }
+
+ // If the node doesn't already exist, we updated it. Inform listeners.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDValue Op1, SDValue Op2,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ const SDValue *Ops,unsigned NumOps,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ return Node;
+}
+
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ EVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ assert(I->getValueType() == N->getOperand(0).getValueType() &&
+ "Operands must all have the same type");
+ }
+ break;
+ }
+ }
+}
+
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ // The SDNode allocators cannot be used to allocate nodes with fields that are
+ // not present in an SDNode!
+ assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+ assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+ assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+ assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+ assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+ assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+ assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+ assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+ assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+ assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+ assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+ assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+ assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+ assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+ assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+ assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+ assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+ assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+ assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+ VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+ // The MachineNode allocators cannot be used to allocate nodes with fields
+ // that are not present in a MachineNode!
+ // Currently there are no such nodes.
+
+ VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
+/// getEVTAlignment - Compute the default alignment value for the
+/// given type.
+///
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+ Type *Ty = VT == MVT::iPTR ?
+ PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+ VT.getTypeForEVT(*getContext());
+
+ return TLI.getDataLayout()->getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
+ : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
+ TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(),
+ getVTList(MVT::Other)),
+ Root(getEntryNode()), Ordering(0), UpdateListeners(0) {
+ AllNodes.push_back(&EntryNode);
+ Ordering = new SDNodeOrdering();
+ DbgInfo = new SDDbgInfo();
+}
+
+void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) {
+ MF = &mf;
+ TTI = tti;
+ Context = &mf.getFunction()->getContext();
+}
+
+SelectionDAG::~SelectionDAG() {
+ assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
+ allnodes_clear();
+ delete Ordering;
+ delete DbgInfo;
+}
+
+void SelectionDAG::allnodes_clear() {
+ assert(&*AllNodes.begin() == &EntryNode);
+ AllNodes.remove(AllNodes.begin());
+ while (!AllNodes.empty())
+ DeallocateNode(AllNodes.begin());
+}
+
+void SelectionDAG::clear() {
+ allnodes_clear();
+ OperandAllocator.Reset();
+ CSEMap.clear();
+
+ ExtendedValueTypeNodes.clear();
+ ExternalSymbols.clear();
+ TargetExternalSymbols.clear();
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+ static_cast<CondCodeSDNode*>(0));
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+ static_cast<SDNode*>(0));
+
+ EntryNode.UseList = 0;
+ AllNodes.push_back(&EntryNode);
+ Root = getEntryNode();
+ Ordering->clear();
+ DbgInfo->clear();
+}
+
+SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ANY_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
+ assert(!VT.isVector() &&
+ "getZeroExtendInReg should use the vector element type instead of "
+ "the vector type!");
+ if (Op.getValueType() == VT) return Op;
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Imm = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ return getNode(ISD::AND, DL, Op.getValueType(), Op,
+ getConstant(Imm, Op.getValueType()));
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ SDValue NegOne =
+ getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+ EVT EltVT = VT.getScalarType();
+ assert((EltVT.getSizeInBits() >= 64 ||
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ "getConstant with a uint64_t value that doesn't fit in the type!");
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
+ return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
+ assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+ EVT EltVT = VT.getScalarType();
+ const ConstantInt *Elt = &Val;
+
+ // In some cases the vector type is legal but the element type is illegal and
+ // needs to be promoted, for example v8i8 on ARM. In this case, promote the
+ // inserted value (the type does not need to match the vector element type).
+ // Any extra bits introduced will be truncated away.
+ if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
+ }
+
+ assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(Elt);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
+ return getConstant(Val, TLI.getPointerTy(), isTarget);
+}
+
+
+SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
+ return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+ EVT EltVT = VT.getScalarType();
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&V);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ // FIXME DebugLoc info might be appropriate here
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+ EVT EltVT = VT.getScalarType();
+ if (EltVT==MVT::f32)
+ return getConstantFP(APFloat((float)Val), VT, isTarget);
+ else if (EltVT==MVT::f64)
+ return getConstantFP(APFloat(Val), VT, isTarget);
+ else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||
+ EltVT==MVT::f16) {
+ bool ignored;
+ APFloat apf = APFloat(Val);
+ apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ &ignored);
+ return getConstantFP(apf, VT, isTarget);
+ } else
+ llvm_unreachable("Unsupported type in getConstantFP");
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
+ EVT VT, int64_t Offset,
+ bool isTargetGA,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTargetGA) &&
+ "Cannot set target flags on target-independent globals");
+
+ // Truncate (with sign-extension) the offset value to the pointer size.
+ unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
+ if (BitWidth < 64)
+ Offset = SignExtend64(Offset, BitWidth);
+
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee for determining thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ unsigned Opc;
+ if (GVar && GVar->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ ID.AddInteger(GV->getType()->getAddressSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT,
+ Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(FI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent jump tables");
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(JTI);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+ Alignment, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ C->addSelectionDAGCSEId(ID);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+ Alignment, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+ unsigned char TargetFlags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0);
+ ID.AddInteger(Index);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MBB);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+ if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+ ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+ SDNode *&N = VT.isExtended() ?
+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+ if (N) return SDValue(N, 0);
+ N = new (NodeAllocator) VTSDNode(VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+ unsigned char TargetFlags) {
+ SDNode *&N =
+ TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+ TargetFlags)];
+ if (N) return SDValue(N, 0);
+ N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (CondCodeNodes[Cond] == 0) {
+ CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
+ CondCodeNodes[Cond] = N;
+ AllNodes.push_back(N);
+ }
+
+ return SDValue(CondCodeNodes[Cond], 0);
+}
+
+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
+// the shuffle mask M that point at N1 to point at N2, and indices that point
+// N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+ std::swap(N1, N2);
+ int NElts = M.size();
+ for (int i = 0; i != NElts; ++i) {
+ if (M[i] >= NElts)
+ M[i] -= NElts;
+ else if (M[i] >= 0)
+ M[i] += NElts;
+ }
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
+ SDValue N2, const int *Mask) {
+ assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Vector Shuffle VTs must be a vectors");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
+ && "Vector Shuffle VTs must have same element type");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // Validate that all indices in Mask are within the range of the elements
+ // input to the shuffle.
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i != NElts; ++i) {
+ assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
+ MaskVec.push_back(Mask[i]);
+ }
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N1 == N2) {
+ N2 = getUNDEF(VT);
+ for (unsigned i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N1.getOpcode() == ISD::UNDEF)
+ commuteShuffle(N1, N2, MaskVec);
+
+ // Canonicalize all index into lhs, -> shuffle lhs, undef
+ // Canonicalize all index into rhs, -> shuffle rhs, undef
+ bool AllLHS = true, AllRHS = true;
+ bool N2Undef = N2.getOpcode() == ISD::UNDEF;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= (int)NElts) {
+ if (N2Undef)
+ MaskVec[i] = -1;
+ else
+ AllLHS = false;
+ } else if (MaskVec[i] >= 0) {
+ AllRHS = false;
+ }
+ }
+ if (AllLHS && AllRHS)
+ return getUNDEF(VT);
+ if (AllLHS && !N2Undef)
+ N2 = getUNDEF(VT);
+ if (AllRHS) {
+ N1 = getUNDEF(VT);
+ commuteShuffle(N1, N2, MaskVec);
+ }
+
+ // If Identity shuffle, or all shuffle in to undef, return that node.
+ bool AllUndef = true;
+ bool Identity = true;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ if (MaskVec[i] >= 0) AllUndef = false;
+ }
+ if (Identity && NElts == N1.getValueType().getVectorNumElements())
+ return N1;
+ if (AllUndef)
+ return getUNDEF(VT);
+
+ FoldingSetNodeID ID;
+ SDValue Ops[2] = { N1, N2 };
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+ for (unsigned i = 0; i != NElts; ++i)
+ ID.AddInteger(MaskVec[i]);
+
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ // Allocate the mask array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the NodeAllocator is released.
+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+ memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+
+ ShuffleVectorSDNode *N =
+ new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
+ SDValue Val, SDValue DTy,
+ SDValue STy, SDValue Rnd, SDValue Sat,
+ ISD::CvtCode Code) {
+ // If the src and dest types are the same and the conversion is between
+ // integer types of the same sign or two floats, no conversion is necessary.
+ if (DTy == STy &&
+ (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+ return Val;
+
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+ AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5,
+ Code);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+ ID.AddInteger(RegNo);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+ ID.AddPointer(RegMask);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1);
+ ID.AddPointer(Label);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+ int64_t Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(BA);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+ assert((!V || V->getType()->isPointerTy()) &&
+ "SrcValue is not a pointer?");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(V);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MD);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
+ EVT OpTy = Op.getValueType();
+ EVT ShTy = TLI.getShiftAmountTy(LHSTy);
+ if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+ ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ unsigned ByteSize = VT.getStoreSize();
+ Type *Ty = VT.getTypeForEVT(*getContext());
+ unsigned StackAlign =
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
+
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+/// CreateStackTemporary - Create a stack temporary suitable for holding
+/// either of the specified value types.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+ unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
+ VT2.getStoreSizeInBits())/8;
+ Type *Ty1 = VT1.getTypeForEVT(*getContext());
+ Type *Ty2 = VT2.getTypeForEVT(*getContext());
+ const DataLayout *TD = TLI.getDataLayout();
+ unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
+ TD->getPrefTypeAlignment(Ty2));
+
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
+ SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return getConstant(1, VT);
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+ const APInt &C2 = N2C->getAPIntValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: return getConstant(C1 == C2, VT);
+ case ISD::SETNE: return getConstant(C1 != C2, VT);
+ case ISD::SETULT: return getConstant(C1.ult(C2), VT);
+ case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
+ case ISD::SETULE: return getConstant(C1.ule(C2), VT);
+ case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
+ case ISD::SETLT: return getConstant(C1.slt(C2), VT);
+ case ISD::SETGT: return getConstant(C1.sgt(C2), VT);
+ case ISD::SETLE: return getConstant(C1.sle(C2), VT);
+ case ISD::SETGE: return getConstant(C1.sge(C2), VT);
+ }
+ }
+ }
+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+ APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT);
+ case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT);
+ case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+ case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, VT);
+ case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
+ case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+ }
+ } else {
+ // Ensure that the constant occurs on the RHS.
+ return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+ }
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+ // This predicate is not safe for vector operations.
+ if (Op.getValueType().isVector())
+ return false;
+
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+ unsigned Depth) const {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth) const {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+ if (Depth == 6)
+ return; // Limit search depth.
+
+ APInt KnownZero2, KnownOne2;
+
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
+ return;
+ case ISD::AND:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ case ISD::OR:
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ case ISD::XOR: {
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case ISD::MUL: {
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ return;
+ }
+ case ISD::UDIV: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+ return;
+ }
+ case ISD::SELECT:
+ ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SELECT_CC:
+ ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ return;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
+ TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+ case ISD::SHL:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShAmt;
+ KnownOne <<= ShAmt;
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ }
+ return;
+ case ISD::SRL:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ return;
+ case ISD::SRA:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bits.
+ APInt SignBit = APInt::getSignBit(BitWidth);
+ SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask.
+
+ if (KnownZero.intersects(SignBit)) {
+ KnownZero |= HighBits; // New bits are known zero.
+ } else if (KnownOne.intersects(SignBit)) {
+ KnownOne |= HighBits; // New bits are known one.
+ }
+ }
+ return;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned EBits = EVT.getScalarType().getSizeInBits();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
+
+ APInt InSignBit = APInt::getSignBit(EBits);
+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
+
+ // If the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InSignBit = InSignBit.zext(BitWidth);
+ if (NewBits.getBoolValue())
+ InputDemandedBits |= InSignBit;
+
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownOne &= InputDemandedBits;
+ KnownZero &= InputDemandedBits;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear
+ KnownZero |= NewBits;
+ KnownOne &= ~NewBits;
+ } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ return;
+ }
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownOne.clearAllBits();
+ return;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ EVT VT = LD->getMemoryVT();
+ unsigned MemBits = VT.getScalarType().getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ } else if (const MDNode *Ranges = LD->getRanges()) {
+ computeMaskedBitsLoad(*Ranges, KnownZero);
+ }
+ return;
+ }
+ case ISD::ZERO_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ return;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InSignBit = APInt::getSignBit(InBits);
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
+
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+
+ // Note if the sign bit is known to be zero or one.
+ bool SignBitKnownZero = KnownZero.isNegative();
+ bool SignBitKnownOne = KnownOne.isNegative();
+ assert(!(SignBitKnownZero && SignBitKnownOne) &&
+ "Sign bit can't be known to be both zero and one!");
+
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero or one, the top bits match.
+ if (SignBitKnownZero)
+ KnownZero |= NewBits;
+ else if (SignBitKnownOne)
+ KnownOne |= NewBits;
+ return;
+ }
+ case ISD::ANY_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ return;
+ }
+ case ISD::TRUNCATE: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ KnownZero = KnownZero.zext(InBits);
+ KnownOne = KnownOne.zext(InBits);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ break;
+ }
+ case ISD::AssertZext: {
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero |= (~InMask);
+ KnownOne &= (~KnownZero);
+ return;
+ }
+ case ISD::FGETSIGN:
+ // All bits are zero except the low bit.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+
+ case ISD::SUB: {
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (CLHS->getAPIntValue().isNonNegative()) {
+ unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+ }
+ }
+ }
+ }
+ // fall through
+ case ISD::ADD:
+ case ISD::ADDE: {
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ KnownZeroOut = std::min(KnownZeroOut,
+ KnownZero2.countTrailingOnes());
+
+ if (Op.getOpcode() == ISD::ADD) {
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear. We
+ // then return to the caller that the low bit is unknown but that other bits
+ // are known zero.
+ if (KnownZeroOut >= 2) // ADDE
+ KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
+ return;
+ }
+ case ISD::SREM:
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue().abs();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
+
+ // The low bits of the first operand are unchanged by the srem.
+ KnownZero = KnownZero2 & LowBits;
+ KnownOne = KnownOne2 & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+ KnownOne |= ~LowBits;
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ }
+ }
+ return;
+ case ISD::UREM: {
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ KnownZero |= ~LowBits;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+
+ uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+ return;
+ }
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ if (unsigned Align = InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ return;
+ }
+ break;
+
+ default:
+ if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+ break;
+ // Fallthrough
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // Allow the target to implement this method for its nodes.
+ TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
+ return;
+ }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+ EVT VT = Op.getValueType();
+ assert(VT.isInteger() && "Invalid VT!");
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp;
+
+ case ISD::Constant: {
+ const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+ return Val.getNumSignBits();
+ }
+
+ case ISD::SIGN_EXTEND:
+ Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp =
+ cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+ Tmp = VTBits-Tmp+1;
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ return std::max(Tmp, Tmp2);
+
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ // SRA X, C -> adds C sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > VTBits) Tmp = VTBits;
+ }
+ return Tmp;
+ case ISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (C->getZExtValue() >= VTBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case ISD::SELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Op.getOpcode() == ISD::ROTR)
+ RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ }
+ break;
+ case ISD::ADD:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ case ISD::TRUNCATE:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // If we are looking at the loaded value of the SDNode.
+ if (Op.getResNo() == 0) {
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp;
+ }
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+ if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+
+ APInt Mask;
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-VTBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD. This handles the equivalence:
+/// X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0),
+ cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ return false;
+
+ return true;
+}
+
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+ // If we're told that NaNs won't happen, assume they won't.
+ if (getTarget().Options.NoNaNsFPMath)
+ return true;
+
+ // If the value is a constant, we can obviously see if it is a NaN or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->getValueAPF().isNaN();
+
+ // TODO: Recognize more cases here.
+
+ return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->isZero();
+
+ // TODO: Recognize more cases here.
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return !C->isNullValue();
+ break;
+ }
+
+ return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+ // Check the obvious case.
+ if (A == B) return true;
+
+ // For for negative and positive zero.
+ if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+ if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+ if (CA->isZero() && CB->isZero()) return true;
+
+ // Otherwise they may not be equal.
+ return false;
+}
+
+/// getNode - Gets or creates the specified node.
+///
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ EVT VT, SDValue Operand) {
+ // Constant fold unary operations with an integer constant operand.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+ const APInt &Val = C->getAPIntValue();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND:
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
+ (void)apf.convertFromAPInt(Val,
+ Opcode==ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
+ else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
+ break;
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), VT);
+ case ISD::CTPOP:
+ return getConstant(Val.countPopulation(), VT);
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return getConstant(Val.countLeadingZeros(), VT);
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return getConstant(Val.countTrailingZeros(), VT);
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+ APFloat V = C->getValueAPF(); // make copy
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ integerPart x[2];
+ bool ignored;
+ assert(integerPartWidth >= 64);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+ Opcode==ISD::FP_TO_SINT,
+ APFloat::rmTowardZero, &ignored);
+ if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ APInt api(VT.getSizeInBits(), x);
+ return getConstant(api, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+ break;
+ }
+ }
+
+ unsigned OpOpcode = Operand.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ case ISD::MERGE_VALUES:
+ case ISD::CONCAT_VECTORS:
+ return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+ case ISD::FP_EXTEND:
+ assert(VT.isFloatingPoint() &&
+ Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+ if (Operand.getValueType() == VT) return Operand; // noop conversion.
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (Operand.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid SIGN_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid sext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // sext(undef) = 0, because the top bits will all be the same.
+ return getConstant(0, VT);
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ZERO_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid zext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT,
+ Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // zext(undef) = 0, because the top bits will be zero.
+ return getConstant(0, VT);
+ break;
+ case ISD::ANY_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ANY_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid anyext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // (ext (trunx x)) -> x
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = Operand.getNode()->getOperand(0);
+ if (OpOp.getValueType() == VT)
+ return OpOp;
+ }
+ break;
+ case ISD::TRUNCATE:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid TRUNCATE!");
+ if (Operand.getValueType() == VT) return Operand; // noop truncate
+ assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
+ "Invalid truncate node, src < dst!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+ .bitsLT(VT.getScalarType()))
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ return Operand.getNode()->getOperand(0);
+ }
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::BITCAST:
+ // Basic sanity checking.
+ assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+ && "Cannot BITCAST between types of different sizes!");
+ if (VT == Operand.getValueType()) return Operand; // noop conversion.
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(VT.isVector() && !Operand.getValueType().isVector() &&
+ (VT.getVectorElementType() == Operand.getValueType() ||
+ (VT.getVectorElementType().isInteger() &&
+ Operand.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Operand.getOperand(1)) &&
+ Operand.getConstantOperandVal(1) == 0 &&
+ Operand.getOperand(0).getValueType() == VT)
+ return Operand.getOperand(0);
+ break;
+ case ISD::FNEG:
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+ return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+ Operand.getNode()->getOperand(0));
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return Operand.getNode()->getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ SDValue Ops[1] = { Operand };
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
+ SDNode *Cst1, SDNode *Cst2) {
+ SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
+ SmallVector<SDValue, 4> Outputs;
+ EVT SVT = VT.getScalarType();
+
+ ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
+ ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
+ if (Scalar1 && Scalar2) {
+ // Scalar instruction.
+ Inputs.push_back(std::make_pair(Scalar1, Scalar2));
+ } else {
+ // For vectors extract each constant element into Inputs so we can constant
+ // fold them individually.
+ BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+ BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+ if (!BV1 || !BV2)
+ return SDValue();
+
+ assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+
+ for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+ ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+ ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+ if (!V1 || !V2) // Not a constant, bail.
+ return SDValue();
+
+ // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+ // FIXME: This is valid and could be handled by truncating the APInts.
+ if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+ return SDValue();
+
+ Inputs.push_back(std::make_pair(V1, V2));
+ }
+ }
+
+ // We have a number of constant values, constant fold them element by element.
+ for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
+ const APInt &C1 = Inputs[I].first->getAPIntValue();
+ const APInt &C2 = Inputs[I].second->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD:
+ Outputs.push_back(getConstant(C1 + C2, SVT));
+ break;
+ case ISD::SUB:
+ Outputs.push_back(getConstant(C1 - C2, SVT));
+ break;
+ case ISD::MUL:
+ Outputs.push_back(getConstant(C1 * C2, SVT));
+ break;
+ case ISD::UDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.udiv(C2), SVT));
+ break;
+ case ISD::UREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.urem(C2), SVT));
+ break;
+ case ISD::SDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
+ break;
+ case ISD::SREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.srem(C2), SVT));
+ break;
+ case ISD::AND:
+ Outputs.push_back(getConstant(C1 & C2, SVT));
+ break;
+ case ISD::OR:
+ Outputs.push_back(getConstant(C1 | C2, SVT));
+ break;
+ case ISD::XOR:
+ Outputs.push_back(getConstant(C1 ^ C2, SVT));
+ break;
+ case ISD::SHL:
+ Outputs.push_back(getConstant(C1 << C2, SVT));
+ break;
+ case ISD::SRL:
+ Outputs.push_back(getConstant(C1.lshr(C2), SVT));
+ break;
+ case ISD::SRA:
+ Outputs.push_back(getConstant(C1.ashr(C2), SVT));
+ break;
+ case ISD::ROTL:
+ Outputs.push_back(getConstant(C1.rotl(C2), SVT));
+ break;
+ case ISD::ROTR:
+ Outputs.push_back(getConstant(C1.rotr(C2), SVT));
+ break;
+ default:
+ return SDValue();
+ }
+ }
+
+ // Handle the scalar case first.
+ if (Outputs.size() == 1)
+ return Outputs.back();
+
+ // Otherwise build a big vector out of the scalar elements we generated.
+ return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(),
+ Outputs.size());
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,
+ SDValue N2) {
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ switch (Opcode) {
+ default: break;
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ if (N1 == N2) return N1;
+ break;
+ case ISD::CONCAT_VECTORS:
+ // Concat of UNDEFs is UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF &&
+ N2.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::AND:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N2;
+ if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ return N1;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
+ // it's worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (getTarget().Options.UnsafeFPMath) {
+ if (Opcode == ISD::FADD) {
+ // 0+x --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+ if (CFP->getValueAPF().isZero())
+ return N2;
+ // x+0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FSUB) {
+ // x-0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FMUL) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1);
+ SDValue V = N2;
+
+ // If the first operand isn't the constant, try the second
+ if (!CFP) {
+ CFP = dyn_cast<ConstantFPSDNode>(N2);
+ V = N1;
+ }
+
+ if (CFP) {
+ // 0*x --> 0
+ if (CFP->isZero())
+ return SDValue(CFP,0);
+ // 1*x --> x
+ if (CFP->isExactlyValue(1.0))
+ return V;
+ }
+ }
+ }
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ N1.getValueType().isFloatingPoint() &&
+ N2.getValueType().isFloatingPoint() &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(VT.isInteger() && N2.getValueType().isInteger() &&
+ "Shifts only work on integers");
+ assert((!VT.isVector() || VT == N2.getValueType()) &&
+ "Vector shift amounts must be in the same as their first arg");
+ // Verify that the shift amount VT is bit enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueType().getSizeInBits() >=
+ Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
+
+ // Always fold shifts of i1 values so the code generator doesn't need to
+ // handle them. Since we know the size of the shift has to be less than the
+ // size of the value, the shift/rotate count is guaranteed to be zero.
+ if (VT == MVT::i1)
+ return N1;
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::FP_ROUND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg round!");
+ assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+ "Cannot FP_ROUND_INREG integer types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "FP_ROUND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in FP_ROUND_INREG");
+ assert(EVT.bitsLE(VT) && "Not rounding down!");
+ (void)EVT;
+ if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
+ break;
+ }
+ case ISD::FP_ROUND:
+ assert(VT.isFloatingPoint() &&
+ N1.getValueType().isFloatingPoint() &&
+ VT.bitsLE(N1.getValueType()) &&
+ isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(!EVT.isVector() &&
+ "AssertSExt/AssertZExt type should be the vector element type "
+ "rather than the vector type!");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (VT == EVT) return N1; // noop assertion.
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "SIGN_EXTEND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in SIGN_EXTEND_INREG");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (EVT == VT) return N1; // Not actually extending
+
+ if (N1C) {
+ APInt Val = N1C->getAPIntValue();
+ unsigned FromBits = EVT.getScalarType().getSizeInBits();
+ Val <<= Val.getBitWidth()-FromBits;
+ Val = Val.ashr(Val.getBitWidth()-FromBits);
+ return getConstant(Val, VT);
+ }
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers.
+ if (N2C &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0) {
+ unsigned Factor =
+ N1.getOperand(0).getValueType().getVectorNumElements();
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ N1.getOperand(N2C->getZExtValue() / Factor),
+ getConstant(N2C->getZExtValue() % Factor,
+ N2.getValueType()));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+ // expanding large vector constants.
+ if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue Elt = N1.getOperand(N2C->getZExtValue());
+
+ if (VT != Elt.getValueType())
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated, and the result implicitly
+ // extended. Make that explicit here.
+ Elt = getAnyExtOrTrunc(Elt, DL, VT);
+
+ return Elt;
+ }
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ // If the indices are the same, return the inserted element else
+ // if the indices are known different, extract the element from
+ // the original vector.
+ SDValue N1Op2 = N1.getOperand(2);
+ ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode());
+
+ if (N1Op2C && N2C) {
+ if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+ if (VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
+ else
+ return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+ }
+
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+ }
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+ assert(!N1.getValueType().isVector() && !VT.isVector() &&
+ (N1.getValueType().isInteger() == VT.isInteger()) &&
+ N1.getValueType() != VT &&
+ "Wrong types for EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getZExtValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ unsigned ElementSize = VT.getSizeInBits();
+ unsigned Shift = ElementSize * N2C->getZExtValue();
+ APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+ return getConstant(ShiftedVal.trunc(ElementSize), VT);
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Index = N2;
+ if (VT.isSimple() && N1.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Extract subvector VTs must be a vectors!");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Extract subvector must be from larger vector to smaller vector!");
+
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((VT.getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= N1.getValueType().getVectorNumElements())
+ && "Extract subvector overflow!");
+ }
+
+ // Trivial extraction.
+ if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+ return N1;
+ }
+ break;
+ }
+ }
+
+ // Perform trivial constant folding.
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
+ if (SV.getNode()) return SV;
+
+ // Canonicalize constant to RHS if commutative.
+ if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ }
+
+ // Constant fold FP operations.
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ if (N1CFP) {
+ if (!N2CFP && isCommutativeBinOp(Opcode)) {
+ // Canonicalize constant to RHS if commutative.
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ } else if (N2CFP) {
+ APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+ APFloat::opStatus s;
+ switch (Opcode) {
+ case ISD::FADD:
+ s = V1.add(V2, APFloat::rmNearestTiesToEven);
+ if (s != APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FSUB:
+ s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FMUL:
+ s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FDIV:
+ s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FREM :
+ s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FCOPYSIGN:
+ V1.copySign(V2);
+ return getConstantFP(V1, VT);
+ default: break;
+ }
+ }
+
+ if (Opcode == ISD::FP_ROUND) {
+ APFloat V = N1CFP->getValueAPF(); // make copy
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
+ }
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.getOpcode() == ISD::UNDEF) {
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SRA:
+ return N1; // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(undef, arg2) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N2;
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.getOpcode() == ISD::UNDEF) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.getOpcode() == ISD::UNDEF)
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, VT);
+ // fallthrough
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return N2; // fold op(arg1, undef) -> undef
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (getTarget().Options.UnsafeFPMath)
+ return N2;
+ break;
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(arg1, undef) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N1;
+ case ISD::OR:
+ if (!VT.isVector())
+ return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ // For vectors, we can't easily build an all one vector, just return
+ // the LHS.
+ return N1;
+ case ISD::SRA:
+ return N1;
+ }
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) {
+ SDValue Ops[] = { N1, N2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ // Perform various simplifications.
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ switch (Opcode) {
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR &&
+ N3.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::SETCC: {
+ // Use FoldSetCC to simplify SETCC's.
+ SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
+ if (Simp.getNode()) return Simp;
+ break;
+ }
+ case ISD::SELECT:
+ if (N1C) {
+ if (N1C->getZExtValue())
+ return N2; // select true, X, Y -> X
+ return N3; // select false, X, Y -> Y
+ }
+
+ if (N2 == N3) return N2; // select C, X, X -> X
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Index = N3;
+ if (VT.isSimple() && N1.getValueType().isSimple()
+ && N2.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ N2.getValueType().isVector() &&
+ "Insert subvector VTs must be a vectors");
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((N2.getValueType().getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= VT.getVectorNumElements())
+ && "Insert subvector overflow!");
+ }
+
+ // Trivial insertion.
+ if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+ return N2;
+ }
+ break;
+ }
+ case ISD::BITCAST:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) {
+ SDValue Ops[] = { N1, N2, N3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VT, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VT, Ops, 5);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+ SmallVector<SDValue, 8> ArgChains;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument.
+ for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+ UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0)
+ ArgChains.push_back(SDValue(L, 1));
+
+ // Build a tokenfactor for all the chains.
+ return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+ &ArgChains[0], ArgChains.size());
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(Value.getOpcode() != ISD::UNDEF);
+
+ unsigned NumBits = VT.getScalarType().getSizeInBits();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ assert(C->getAPIntValue().getBitWidth() == 8);
+ APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
+ if (VT.isInteger())
+ return DAG.getConstant(Val, VT);
+ return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
+ }
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+ }
+
+ return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
+ const TargetLowering &TLI, StringRef Str) {
+ // Handle vector with all elements zero.
+ if (Str.empty()) {
+ if (VT.isInteger())
+ return DAG.getConstant(0, VT);
+ else if (VT == MVT::f32 || VT == MVT::f64)
+ return DAG.getConstantFP(0.0, VT);
+ else if (VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElts)));
+ } else
+ llvm_unreachable("Expected type!");
+ }
+
+ assert(!VT.isVector() && "Can't handle vector type here!");
+ unsigned NumVTBits = VT.getSizeInBits();
+ unsigned NumVTBytes = NumVTBits / 8;
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
+ APInt Val(NumVTBits, 0);
+ if (TLI.isLittleEndian()) {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+ } else {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
+ }
+
+ // If the "cost" of materializing the integer immediate is 1 or free, then
+ // it is cost effective to turn the load into the immediate.
+ const TargetTransformInfo *TTI = DAG.getTargetTransformInfo();
+ if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2)
+ return DAG.getConstant(Val, VT);
+ return SDValue(0, 0);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+///
+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
+ SelectionDAG &DAG) {
+ EVT VT = Base.getValueType();
+ return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
+ VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
+ unsigned SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ }
+ if (!G)
+ return false;
+
+ return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
+}
+
+/// FindOptimalMemOpLowering - Determines the optimial series memory ops
+/// to replace the memset / memcpy. Return true if the number of memory ops
+/// is below the threshold. It returns the types of the sequence of
+/// memory ops to perform memset / memcpy by reference.
+static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ bool AllowOverlap,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
+ "Expecting memcpy / memset source to meet alignment requirement!");
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
+ EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
+ IsMemset, ZeroMemset, MemcpyStrSrc,
+ DAG.getMachineFunction());
+
+ if (VT == MVT::Other) {
+ if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
+ TLI.allowsUnalignedMemoryAccesses(VT)) {
+ VT = TLI.getPointerTy();
+ } else {
+ switch (DstAlign & 7) {
+ case 0: VT = MVT::i64; break;
+ case 4: VT = MVT::i32; break;
+ case 2: VT = MVT::i16; break;
+ default: VT = MVT::i8; break;
+ }
+ }
+
+ MVT LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+ assert(LVT.isInteger());
+
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ EVT NewVT = VT;
+ unsigned NewVTSize;
+
+ bool Found = false;
+ if (VT.isVector() || VT.isFloatingPoint()) {
+ NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+ if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+ TLI.isSafeMemOpType(NewVT.getSimpleVT()))
+ Found = true;
+ else if (NewVT == MVT::i64 &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+ TLI.isSafeMemOpType(MVT::f64)) {
+ // i64 is usually not legal on 32-bit targets, but f64 may be.
+ NewVT = MVT::f64;
+ Found = true;
+ }
+ }
+
+ if (!Found) {
+ do {
+ NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+ if (NewVT == MVT::i8)
+ break;
+ } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
+ }
+ NewVTSize = NewVT.getSizeInBits() / 8;
+
+ // If the new VT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ // FIXME: Only does this for 64-bit or more since we don't have proper
+ // cost model for unaligned load / store.
+ bool Fast;
+ if (NumMemOps && AllowOverlap &&
+ VTSize >= 8 && NewVTSize < Size &&
+ TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)
+ VTSize = Size;
+ else {
+ VT = NewVT;
+ VTSize = NewVTSize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool isVol,
+ bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memcpy of undef to nop.
+ if (Src.getOpcode() == ISD::UNDEF)
+ return Chain;
+
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize =
+ MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+ StringRef Str;
+ bool CopyFromStr = isMemSrcFromString(Src, Str);
+ bool isZeroStr = CopyFromStr && Str.empty();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align),
+ (isZeroStr ? 0 : SrcAlign),
+ false, false, CopyFromStr, true, DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Align &&
+ TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
+ NewAlign /= 2;
+
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ uint64_t SrcOff = 0, DstOff = 0;
+ for (unsigned i = 0; i != NumMemOps; ++i) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ SrcOff -= VTSize - Size;
+ DstOff -= VTSize - Size;
+ }
+
+ if (CopyFromStr &&
+ (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // We only handle zero vectors here.
+ // FIXME: Handle other cases where store of vector immediate is done in
+ // a single instruction.
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
+ if (Value.getNode())
+ Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), isVol,
+ false, Align);
+ }
+
+ if (!Store.getNode()) {
+ // The type might not be legal for the target. This should only happen
+ // if the type is smaller than a legal type, as on PPC, so the right
+ // thing to do is generate a LoadExt/StoreTrunc pair. These simplify
+ // to Load/Store if NVT==VT.
+ // FIXME does the case above also need this?
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(NVT.bitsGE(VT));
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
+ MinAlign(SrcAlign, SrcOff));
+ Store = DAG.getTruncStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+ false, Align);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ Size -= VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool isVol,
+ bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memmove of undef to nop.
+ if (Src.getOpcode() == ISD::UNDEF)
+ return Chain;
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
+
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align), SrcAlign,
+ false, false, false, false, DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ uint64_t SrcOff = 0, DstOff = 0;
+ SmallVector<SDValue, 8> LoadValues;
+ SmallVector<SDValue, 8> LoadChains;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Value = DAG.getLoad(VT, dl, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcPtrInfo.getWithOffset(SrcOff), isVol,
+ false, false, SrcAlign);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Store = DAG.getStore(Chain, dl, LoadValues[i],
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo) {
+ // Turn a memset of undef to nop.
+ if (Src.getOpcode() == ISD::UNDEF)
+ return Chain;
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ bool IsZeroVal =
+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
+ Size, (DstAlignCanChange ? 0 : Align), 0,
+ true, IsZeroVal, false, true, DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+ unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ DstOff -= VTSize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
+ SDValue Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVol, false, Align);
+ OutChains.push_back(Store);
+ DstOff += VT.getSizeInBits() / 8;
+ Size -= VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVol, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),Align,
+ isVol, false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ isVol, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ true, DstPtrInfo, SrcPtrInfo);
+ }
+
+ // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
+ // memcpy is not guaranteed to be safe. libc memcpys aren't required to
+ // respect volatile, so they may do things like read or write memory
+ // beyond the given memory regions. But fixing this isn't easy, and most
+ // people don't care.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
+ TLI.getPointerTy()),
+ Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+
+ // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+ // not be safe. See memcpy above for more details.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
+ TLI.getPointerTy()),
+ Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+ Align, isVol, DstPtrInfo);
+
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ DstPtrInfo);
+ if (Result.getNode())
+ return Result;
+
+ // Emit a library call.
+ Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else
+ Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*getContext());
+ Entry.isSExt = true;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Entry.Ty = IntPtrTy;
+ Entry.isSExt = false;
+ Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
+ false, false, false, false, 0,
+ TLI.getLibcallCallingConv(RTLIB::MEMSET),
+ /*isTailCall=*/false,
+ /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
+ getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+ TLI.getPointerTy()),
+ Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain, SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachinePointerInfo PtrInfo,
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+
+ // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE.
+ // For now, atomics are considered to be volatile always.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ EVT VT = Cmp.getValueType();
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, Cmp, Swp, MMO, Ordering,
+ SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ const Value* PtrVal,
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ // An atomic store does not load. An atomic load does not store.
+ // (An atomicrmw obviously both loads and stores.)
+ // For now, atomics are considered to be volatile always, and they are
+ // chained as such.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+ Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_OR ||
+ Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND ||
+ Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX ||
+ Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_SWAP ||
+ Opcode == ISD::ATOMIC_STORE) &&
+ "Invalid Atomic Op");
+
+ EVT VT = Val.getValueType();
+
+ SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
+ getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Val};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, Val, MMO,
+ Ordering, SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ EVT VT, SDValue Chain,
+ SDValue Ptr,
+ const Value* PtrVal,
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ // An atomic store does not load. An atomic load does not store.
+ // (An atomicrmw obviously both loads and stores.)
+ // For now, atomics are considered to be volatile always, and they are
+ // chained as such.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ EVT VT, SDValue Chain,
+ SDValue Ptr,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, MMO, Ordering, SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
+ DebugLoc dl) {
+ if (NumOps == 1)
+ return Ops[0];
+
+ SmallVector<EVT, 4> VTs;
+ VTs.reserve(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ VTs.push_back(Ops[i].getValueType());
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
+ Ops, NumOps);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+ const EVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachinePointerInfo PtrInfo,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
+ MemVT, PtrInfo, Align, Vol,
+ ReadMem, WriteMem);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachinePointerInfo PtrInfo,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ if (Align == 0) // Ensure that codegen never sees alignment 0
+ Align = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ unsigned Flags = 0;
+ if (WriteMem)
+ Flags |= MachineMemOperand::MOStore;
+ if (ReadMem)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Vol)
+ Flags |= MachineMemOperand::MOVolatile;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
+
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert((Opcode == ISD::INTRINSIC_VOID ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
+ Opcode == ISD::LIFETIME_START ||
+ Opcode == ISD::LIFETIME_END ||
+ (Opcode <= INT_MAX &&
+ (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+ "Opcode is not a memory-accessing opcode!");
+
+ // Memoize the node unless it returns a flag.
+ MemIntrinsicSDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+ MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+ MemVT, MMO);
+ }
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return MachinePointerInfo();
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(FI, Offset+
+ cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.getOpcode() == ISD::UNDEF)
+ return InferPointerInfo(Ptr);
+ return MachinePointerInfo();
+}
+
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ bool isVolatile, bool isNonTemporal, bool isInvariant,
+ unsigned Alignment, const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(VT);
+
+ unsigned Flags = MachineMemOperand::MOLoad;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+ if (isInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
+
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr, Offset);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+ TBAAInfo, Ranges);
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset, EVT MemVT,
+ MachineMemOperand *MMO) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
+ "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ?
+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<LoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType,
+ MemVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo,
+ bool isVolatile, bool isNonTemporal,
+ bool isInvariant, unsigned Alignment,
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo, Ranges);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+ SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+ PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment,
+ TBAAInfo);
+}
+
+
+SDValue
+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+ "Load is already a indexed load!");
+ return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(),
+ false, LD->getAlignment());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(Val.getValueType());
+
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags,
+ Val.getValueType().getStoreSize(), Alignment,
+ TBAAInfo);
+
+ return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+ false, VT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT,bool isVolatile, bool isNonTemporal,
+ unsigned Alignment,
+ const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(SVT);
+
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (isVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+ TBAAInfo);
+
+ return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, EVT SVT,
+ MachineMemOperand *MMO) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (VT == SVT)
+ return getStore(Chain, dl, Val, Ptr, MMO);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() &&
+ "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+ true, SVT, MMO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+ "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM,
+ ST->isTruncatingStore(),
+ ST->getMemoryVT(),
+ ST->getMemOperand());
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ SDValue SV,
+ unsigned Align) {
+ SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ const SDUse *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ // Copy from an SDUse array into an SDValue array for use with
+ // the regular getNode logic.
+ SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
+ return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::SELECT_CC: {
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ break;
+ }
+ case ISD::BR_CC: {
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ }
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+ void *IP = 0;
+
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ ArrayRef<EVT> ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
+ Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const EVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps) {
+ if (NumVTs == 1)
+ return getNode(Opcode, DL, VTs[0], Ops, NumOps);
+ return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+
+#if 0
+ switch (Opcode) {
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+ }
+#endif
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ if (NumOps == 1) {
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+ Ops[2]);
+ } else {
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ CSEMap.InsertNode(N, IP);
+ } else {
+ if (NumOps == 1) {
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+ Ops[2]);
+ } else {
+ N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ }
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {
+ return getNode(Opcode, DL, VTList, 0, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1) {
+ SDValue Ops[] = { N1 };
+ return getNode(Opcode, DL, VTList, Ops, 1);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2) {
+ SDValue Ops[] = { N1, N2 };
+ return getNode(Opcode, DL, VTList, Ops, 2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDValue Ops[] = { N1, N2, N3 };
+ return getNode(Opcode, DL, VTList, Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VTList, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VTList, Ops, 5);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ SDVTList Result = makeVTList(Array, 2);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ SDVTList Result = makeVTList(Array, 3);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3 && I->VTs[3] == VT4)
+ return *I;
+
+ EVT *Array = Allocator.Allocate<EVT>(4);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ SDVTList Result = makeVTList(Array, 4);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
+ switch (NumVTs) {
+ case 0: llvm_unreachable("Cannot have nodes without results!");
+ case 1: return getVTList(VTs[0]);
+ case 2: return getVTList(VTs[0], VTs[1]);
+ case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+ case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]);
+ default: break;
+ }
+
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I) {
+ if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
+ continue;
+
+ if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2]))
+ return *I;
+ }
+
+ EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+ std::copy(VTs, VTs+NumVTs, Array);
+ SDVTList Result = makeVTList(Array, NumVTs);
+ VTList.push_back(Result);
+ return Result;
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ N->OperandList[0].set(Op);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return N; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1)
+ N->OperandList[0].set(Op1);
+ if (N->OperandList[1] != Op2)
+ N->OperandList[1].set(Op2);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4, SDValue Op5) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops, 5);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) {
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ bool AnyChange = false;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Ops[i] != N->getOperand(i)) {
+ AnyChange = true;
+ break;
+ }
+ }
+
+ // No operands changed, just return the input node.
+ if (!AnyChange) return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (N->OperandList[i] != Ops[i])
+ N->OperandList[i].set(Ops[i]);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+ // Unlike the code in MorphNodeTo that does this, we don't need to
+ // watch for dead nodes here.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ Use.set(SDValue());
+ }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+ // Reset the NodeID to -1.
+ N->setNodeId(-1);
+ return N;
+}
+
+/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) {
+ DebugLoc NLoc = N->getDebugLoc();
+ if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) {
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+}
+
+/// MorphNodeTo - This *mutates* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one. Note that the DebugLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ // If an identical node already exists, use it.
+ void *IP = 0;
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc());
+ }
+
+ if (!RemoveNodeFromCSEMaps(N))
+ IP = 0;
+
+ // Start the morphing.
+ N->NodeType = Opc;
+ N->ValueList = VTs.VTs;
+ N->NumValues = VTs.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list. Keep track of any operands that become dead as a result.
+ SmallPtrSet<SDNode*, 16> DeadNodeSet;
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Used = Use.getNode();
+ Use.set(SDValue());
+ if (Used->use_empty())
+ DeadNodeSet.insert(Used);
+ }
+
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
+ // Initialize the memory references information.
+ MN->setMemRefs(0, 0);
+ // If NumOps is larger than the # of operands we can have in a
+ // MachineSDNode, reallocate the operand list.
+ if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
+ if (MN->OperandsNeedDelete)
+ delete[] MN->OperandList;
+ if (NumOps > array_lengthof(MN->LocalOperands))
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+ Ops, NumOps);
+ else
+ MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+ MN->OperandsNeedDelete = false;
+ } else
+ MN->InitOperands(MN->OperandList, Ops, NumOps);
+ } else {
+ // If NumOps is larger than the # of operands we currently have, reallocate
+ // the operand list.
+ if (NumOps > N->NumOperands) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+ N->InitOperands(new SDUse[NumOps], Ops, NumOps);
+ N->OperandsNeedDelete = true;
+ } else
+ N->InitOperands(N->OperandList, Ops, NumOps);
+ }
+
+ // Delete any nodes that are still dead after adding the uses for the
+ // new operands.
+ if (!DeadNodeSet.empty()) {
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+ E = DeadNodeSet.end(); I != E; ++I)
+ if ((*I)->use_empty())
+ DeadNodes.push_back(*I);
+ RemoveDeadNodes(DeadNodes);
+ }
+
+ if (IP)
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+ EVT VT2, EVT VT3, EVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+ ArrayRef<EVT> ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+ return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+ const SDValue *Ops, unsigned NumOps) {
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
+ MachineSDNode *N;
+ void *IP = 0;
+
+ if (DoCSE) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
+ IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL));
+ }
+ }
+
+ // Allocate a new MachineSDNode.
+ N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs);
+
+ // Initialize the operands list.
+ if (NumOps > array_lengthof(N->LocalOperands))
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+ Ops, NumOps);
+ else
+ N->InitOperands(N->LocalOperands, Ops, NumOps);
+ N->OperandsNeedDelete = false;
+
+ if (DoCSE)
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyMachineNode(N);
+#endif
+ return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+ SDValue Operand) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+ SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ VT, Operand, SRIdxVal);
+ return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+ SDValue Operand, SDValue Subreg) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+ SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ VT, Operand, Subreg, SRIdxVal);
+ return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return E;
+ }
+ return NULL;
+}
+
+/// getDbgValue - Creates a SDDbgValue node.
+///
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
+}
+
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SDNode::use_iterator &UI;
+ SDNode::use_iterator &UE;
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ // Increment the iterator as needed.
+ while (UI != UE && N == *UI)
+ ++UI;
+ }
+
+public:
+ RAUWUpdateListener(SelectionDAG &d,
+ SDNode::use_iterator &ui,
+ SDNode::use_iterator &ue)
+ : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
+};
+
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
+ SDNode *From = FromN.getNode();
+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+ "Cannot replace with this method!");
+ assert(From != To.getNode() && "Cannot replace uses of with self");
+
+ // Iterate over all the existing uses of From. New uses will be added
+ // to the beginning of the use list, which we avoid visiting.
+ // This specifically avoids visiting uses of From that arise while the
+ // replacement is happening, because any such uses would be the result
+ // of CSE: If an existing node looks like From after one of its operands
+ // is replaced by To, we don't want to replace of all its users with To
+ // too. See PR3018 for more info.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (FromN == getRoot())
+ setRoot(To);
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ assert((!From->hasAnyUseOfValue(i) ||
+ From->getValueType(i) == To->getValueType(i)) &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+ // Handle the trivial case.
+ if (From == To)
+ return;
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.setNode(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To, getRoot().getResNo()));
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
+ if (From->getNumValues() == 1) // Handle the simple case efficiently.
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ const SDValue &ToOp = To[Use.getResNo()];
+ ++UI;
+ Use.set(ToOp);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To[getRoot().getResNo()]));
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
+ // Handle the really simple, really trivial case efficiently.
+ if (From == To) return;
+
+ // Handle the simple, trivial, case efficiently.
+ if (From.getNode()->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To);
+ return;
+ }
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From.getNode()->use_begin(),
+ UE = From.getNode()->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+ bool UserRemovedFromCSEMaps = false;
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+
+ // Skip uses of different values from the same node.
+ if (Use.getResNo() != From.getResNo()) {
+ ++UI;
+ continue;
+ }
+
+ // If this node hasn't been modified yet, it's still in the CSE maps,
+ // so remove its old self from the CSE maps.
+ if (!UserRemovedFromCSEMaps) {
+ RemoveNodeFromCSEMaps(User);
+ UserRemovedFromCSEMaps = true;
+ }
+
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // We are iterating over all uses of the From node, so if a use
+ // doesn't use the specific value, no changes are made.
+ if (!UserRemovedFromCSEMaps)
+ continue;
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot())
+ setRoot(To);
+}
+
+namespace {
+ /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+ /// to record information about a use.
+ struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+ };
+
+ /// operator< - Sort Memos by User.
+ bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+ }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The same value
+/// may appear in both the From and To list. The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+ const SDValue *To,
+ unsigned Num){
+ // Handle the simple, trivial case efficiently.
+ if (Num == 1)
+ return ReplaceAllUsesOfValueWith(*From, *To);
+
+ // Read up all the uses and make records of them. This helps
+ // processing new uses that are introduced during the
+ // replacement process.
+ SmallVector<UseMemo, 4> Uses;
+ for (unsigned i = 0; i != Num; ++i) {
+ unsigned FromResNo = From[i].getResNo();
+ SDNode *FromNode = From[i].getNode();
+ for (SDNode::use_iterator UI = FromNode->use_begin(),
+ E = FromNode->use_end(); UI != E; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == FromResNo) {
+ UseMemo Memo = { *UI, i, &Use };
+ Uses.push_back(Memo);
+ }
+ }
+ }
+
+ // Sort the uses, so that all the uses from a given User are together.
+ std::sort(Uses.begin(), Uses.end());
+
+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+ UseIndex != UseIndexEnd; ) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Uses[UseIndex].User;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // The Uses array is sorted, so all the uses for a given User
+ // are next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ unsigned i = Uses[UseIndex].Index;
+ SDUse &Use = *Uses[UseIndex].Use;
+ ++UseIndex;
+
+ Use.set(To[i]);
+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+ unsigned DAGSize = 0;
+
+ // SortedPos tracks the progress of the algorithm. Nodes before it are
+ // sorted, nodes after it are unsorted. When the algorithm completes
+ // it is at the end of the list.
+ allnodes_iterator SortedPos = allnodes_begin();
+
+ // Visit all the nodes. Move nodes with no operands to the front of
+ // the list immediately. Annotate nodes that do have operands with their
+ // operand count. Before we do this, the Node Id fields of the nodes
+ // may contain arbitrary values. After, the Node Id fields for nodes
+ // before SortedPos will contain the topological sort index, and the
+ // Node Id fields for nodes At SortedPos and after will contain the
+ // count of outstanding operands.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+ SDNode *N = I++;
+ checkForCycles(N);
+ unsigned Degree = N->getNumOperands();
+ if (Degree == 0) {
+ // A node with no uses, add it to the result array immediately.
+ N->setNodeId(DAGSize++);
+ allnodes_iterator Q = N;
+ if (Q != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Temporarily use the Node Id as scratch space for the degree count.
+ N->setNodeId(Degree);
+ }
+ }
+
+ // Visit all the nodes. As we iterate, move nodes into sorted order,
+ // such that by the time the end is reached all nodes will be sorted.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+ checkForCycles(N);
+ // N is in sorted position, so all its uses have one less operand
+ // that needs to be sorted.
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *P = *UI;
+ unsigned Degree = P->getNodeId();
+ assert(Degree != 0 && "Invalid node degree");
+ --Degree;
+ if (Degree == 0) {
+ // All of P's operands are sorted, so P may sorted now.
+ P->setNodeId(DAGSize++);
+ if (P != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Update P's outstanding operand count.
+ P->setNodeId(Degree);
+ }
+ }
+ if (I == SortedPos) {
+#ifndef NDEBUG
+ SDNode *S = ++I;
+ dbgs() << "Overran sorted position:\n";
+ S->dumprFull();
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ assert(SortedPos == AllNodes.end() &&
+ "Topological sort incomplete!");
+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token!");
+ assert(AllNodes.front().getNodeId() == 0 &&
+ "First node in topological sort has non-zero id!");
+ assert(AllNodes.front().getNumOperands() == 0 &&
+ "First node in topological sort has operands!");
+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+ "Last node in topologic sort has unexpected id!");
+ assert(AllNodes.back().use_empty() &&
+ "Last node in topologic sort has users!");
+ assert(DAGSize == allnodes_size() && "Node count mismatch!");
+ return DAGSize;
+}
+
+/// AssignOrdering - Assign an order to the SDNode.
+void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) {
+ assert(SD && "Trying to assign an order to a null node!");
+ Ordering->add(SD, Order);
+}
+
+/// GetOrdering - Get the order for the SDNode.
+unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
+ assert(SD && "Trying to get the order of a null node!");
+ return Ordering->getOrder(SD);
+}
+
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+ DbgInfo->add(DB, SD, isParameter);
+ if (SD)
+ SD->setHasDebugValue(true);
+}
+
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+ if (From == To || !From.getNode()->getHasDebugValue())
+ return;
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
+ I != E; ++I) {
+ SDDbgValue *Dbg = *I;
+ if (Dbg->getKind() == SDDbgValue::SDNODE) {
+ SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+ Dbg->getOffset(), Dbg->getDebugLoc(),
+ Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+ }
+ }
+ for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+ E = ClonedDVs.end(); I != E; ++I)
+ AddDbgValue(*I, ToNode, false);
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+HandleSDNode::~HandleSDNode() {
+ DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL,
+ const GlobalValue *GA,
+ EVT VT, int64_t o, unsigned char TF)
+ : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+ TheGlobal = GA;
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
+ MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant());
+ assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(isNonTemporal() == MMO->isNonTemporal() &&
+ "Non-temporal encoding error!");
+ assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+ const SDValue *Ops, unsigned NumOps, EVT memvt,
+ MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs, Ops, NumOps),
+ MemoryVT(memvt), MMO(mmo) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant());
+ assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+ AddNodeIDNode(ID, this);
+}
+
+namespace {
+ struct EVTArray {
+ std::vector<EVT> VTs;
+
+ EVTArray() {
+ VTs.reserve(MVT::LAST_VALUETYPE);
+ for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+ VTs.push_back(MVT((MVT::SimpleValueType)i));
+ }
+ };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+ if (VT.isExtended()) {
+ sys::SmartScopedLock<true> Lock(*VTMutex);
+ return &(*EVTs->insert(VT).first);
+ } else {
+ assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+ "Value type out of range!");
+ return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+ }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // TODO: Only iterate over uses of a given value of the node
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ if (UI.getUse().getResNo() == Value) {
+ if (NUses == 0)
+ return false;
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+ if (UI.getUse().getResNo() == Value)
+ return true;
+
+ return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(SDNode *N) const {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (*this == N->getOperand(i))
+ return true;
+ return false;
+}
+
+bool SDNode::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+ if (this == N->OperandList[i].getNode())
+ return true;
+ return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+ unsigned Depth) const {
+ if (*this == Dest) return true;
+
+ // Don't search too deeply, we just want to be able to see through
+ // TokenFactor's etc.
+ if (Depth == 0) return false;
+
+ // If this is a token factor, all inputs to the TF happen in parallel. If any
+ // of the operands of the TF does not reach dest, then we cannot do the xform.
+ if (getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return false;
+ return true;
+ }
+
+ // Loads don't have side effects, look through them.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+ if (!Ld->isVolatile())
+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+ }
+ return false;
+}
+
+/// hasPredecessor - Return true if N is a predecessor of this node.
+/// N is either an operand of this node, or can be reached by recursively
+/// traversing up the operands.
+/// NOTE: This is an expensive method. Use it carefully.
+bool SDNode::hasPredecessor(const SDNode *N) const {
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ return hasPredecessorHelper(N, Visited, Worklist);
+}
+
+bool SDNode::hasPredecessorHelper(const SDNode *N,
+ SmallPtrSet<const SDNode *, 32> &Visited,
+ SmallVector<const SDNode *, 16> &Worklist) const {
+ if (Visited.empty()) {
+ Worklist.push_back(this);
+ } else {
+ // Take a look in the visited set. If we've already encountered this node
+ // we needn't search further.
+ if (Visited.count(N))
+ return true;
+ }
+
+ // Haven't visited N yet. Continue the search.
+ while (!Worklist.empty()) {
+ const SDNode *M = Worklist.pop_back_val();
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ SDNode *Op = M->getOperand(i).getNode();
+ if (Visited.insert(Op))
+ Worklist.push_back(Op);
+ if (Op == N)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+ assert(Num < NumOperands && "Invalid child # of SDNode!");
+ return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+ assert(N->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ unsigned i;
+ for (i= 0; i != NE; ++i) {
+ for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperandEltVT,
+ Operand,
+ getConstant(i, TLI.getPointerTy()));
+ } else {
+ // A scalar operand; just use it as is.
+ Operands[j] = Operand;
+ }
+ }
+
+ switch (N->getOpcode()) {
+ default:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
+ case ISD::VSELECT:
+ Scalars.push_back(getNode(ISD::SELECT, dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
+ break;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::FP_ROUND_INREG: {
+ EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ Operands[0],
+ getValueType(ExtVT)));
+ }
+ }
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(getUNDEF(EltVT));
+
+ return getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*getContext(), EltVT, ResNE),
+ &Scalars[0], Scalars.size());
+}
+
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+ unsigned Bytes, int Dist) const {
+ if (LD->getChain() != Base->getChain())
+ return false;
+ EVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LD->getOperand(1);
+ SDValue BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ // Handle X+C
+ if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
+
+ const GlobalValue *GV1 = NULL;
+ const GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+ // If this is a GlobalAddress + cst, return the alignment.
+ const GlobalValue *GV;
+ int64_t GVOffset = 0;
+ if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+ unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+ APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+ llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
+ TLI.getDataLayout());
+ unsigned AlignBits = KnownZero.countTrailingOnes();
+ unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+ if (Align)
+ return MinAlign(Align, GVOffset);
+ }
+
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = 1 << 31;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (isBaseWithConstantOffset(Ptr) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != (1 << 31)) {
+ const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ return FIInfoAlign;
+ }
+
+ return 0;
+}
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+ return getGlobal()->getType()->getAddressSpace();
+}
+
+
+Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+ APInt &SplatUndef,
+ unsigned &SplatBitSize,
+ bool &HasAnyUndefs,
+ unsigned MinSplatBits,
+ bool isBigEndian) {
+ EVT VT = getValueType(0);
+ assert(VT.isVector() && "Expected a vector type");
+ unsigned sz = VT.getSizeInBits();
+ if (MinSplatBits > sz)
+ return false;
+
+ SplatValue = APInt(sz, 0);
+ SplatUndef = APInt(sz, 0);
+
+ // Get the bits. Bits with undefined values (when the corresponding element
+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+ // in SplatValue. If any of the values are not constant, give up and return
+ // false.
+ unsigned int nOps = getNumOperands();
+ assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+
+ for (unsigned j = 0; j < nOps; ++j) {
+ unsigned i = isBigEndian ? nOps-1-j : j;
+ SDValue OpVal = getOperand(i);
+ unsigned BitPos = j * EltBitSize;
+
+ if (OpVal.getOpcode() == ISD::UNDEF)
+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+ else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
+ zextOrTrunc(sz) << BitPos;
+ else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+ else
+ return false;
+ }
+
+ // The build_vector is all constants or undefs. Find the smallest element
+ // size that splats the vector.
+
+ HasAnyUndefs = (SplatUndef != 0);
+ while (sz > 8) {
+
+ unsigned HalfSize = sz / 2;
+ APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatValue.trunc(HalfSize);
+ APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = SplatUndef.trunc(HalfSize);
+
+ // If the two halves do not match (ignoring undef bits), stop here.
+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+ MinSplatBits > HalfSize)
+ break;
+
+ SplatValue = HighValue | LowValue;
+ SplatUndef = HighUndef & LowUndef;
+
+ sz = HalfSize;
+ }
+
+ SplatBitSize = sz;
+ return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i, e;
+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+ /* search */;
+
+ assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+ // Make sure all remaining elements are either undef or the same as the first
+ // non-undef value.
+ for (int Idx = Mask[i]; i != e; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Idx)
+ return false;
+ return true;
+}
+
+#ifdef XDEBUG
+static void checkForCyclesHelper(const SDNode *N,
+ SmallPtrSet<const SDNode*, 32> &Visited,
+ SmallPtrSet<const SDNode*, 32> &Checked) {
+ // If this node has already been checked, don't check it again.
+ if (Checked.count(N))
+ return;
+
+ // If a node has already been visited on this depth-first walk, reject it as
+ // a cycle.
+ if (!Visited.insert(N)) {
+ dbgs() << "Offending node:\n";
+ N->dumprFull();
+ errs() << "Detected cycle in SelectionDAG\n";
+ abort();
+ }
+
+ for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+
+ Checked.insert(N);
+ Visited.erase(N);
+}
+#endif
+
+void llvm::checkForCycles(const llvm::SDNode *N) {
+#ifdef XDEBUG
+ assert(N && "Checking nonexistant SDNode");
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked);
+#endif
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
+ checkForCycles(DAG->getRoot().getNode());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 0000000..ce40cd6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,6873 @@
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision),
+ cl::init(0));
+
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static const unsigned MaxParallelChains = 64;
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V);
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts,
+ unsigned NumParts, MVT PartVT, EVT ValueVT,
+ const Value *V,
+ ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ if (ValueVT.isVector())
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, V);
+
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ if (NumParts > 1) {
+ // Assemble the value from multiple parts.
+ if (ValueVT.isInteger()) {
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned ValueBits = ValueVT.getSizeInBits();
+
+ // Assemble the power of 2 part.
+ unsigned RoundParts = NumParts & (NumParts - 1) ?
+ 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundBits = PartBits * RoundParts;
+ EVT RoundVT = RoundBits == ValueBits ?
+ ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+ SDValue Lo, Hi;
+
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+ if (RoundParts > 2) {
+ Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
+ PartVT, HalfVT, V);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
+ RoundParts / 2, PartVT, HalfVT, V);
+ } else {
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
+ }
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
+
+ if (RoundParts < NumParts) {
+ // Assemble the trailing non-power-of-2 part.
+ unsigned OddParts = NumParts - RoundParts;
+ EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+ Hi = getCopyFromParts(DAG, DL,
+ Parts + RoundParts, OddParts, PartVT, OddVT, V);
+
+ // Combine the round and odd parts.
+ Lo = Val;
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueType().getSizeInBits(),
+ TLI.getPointerTy()));
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
+ }
+ } else if (PartVT.isFloatingPoint()) {
+ // FP split into multiple FP parts (for ppcf128)
+ assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
+ "Unexpected split");
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
+ } else {
+ // FP split into integer parts (soft fp)
+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+ !PartVT.isVector() && "Unexpected split");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
+ }
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isInteger() && ValueVT.isInteger()) {
+ if (ValueVT.bitsLT(PartEVT)) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp != ISD::DELETED_NODE)
+ Val = DAG.getNode(AssertOp, DL, PartEVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ // FP_ROUND's are always exact here.
+ if (ValueVT.bitsLT(Val.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
+ DAG.getTargetConstant(1, TLI.getPointerTy()));
+
+ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ llvm_unreachable("Unknown mismatch!");
+}
+
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent. If the parts combine to a
+/// type larger then ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V) {
+ assert(ValueVT.isVector() && "Not a vector value");
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ // Handle a multi-element vector.
+ if (NumParts > 1) {
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getSimpleValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+ PartVT, IntermediateVT, V);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT, V);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ?
+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
+ ValueVT, &Ops[0], NumIntermediates);
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isVector()) {
+ // If the element type of the source/dest vectors are the same, but the
+ // parts vector has more elements than the value vector, then we have a
+ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
+ // elements we want.
+ if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ "Cannot narrow, it would be a lossy transformation");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+ "Cannot handle this kind of promotion");
+ // Promoted vector extract
+ bool Smaller = ValueVT.bitsLE(PartEVT);
+ return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT, Val);
+
+ }
+
+ // Trivial bitcast if the types are the same size and the destination
+ // vector type is legal.
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+ TLI.isTypeLegal(ValueVT))
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle cases such as i8 -> <1 x i1>
+ if (ValueVT.getVectorNumElements() != 1) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("non-trivial scalar-to-vector conversion");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ report_fatal_error("Cannot handle scalar-to-vector conversion!");
+ }
+
+ if (ValueVT.getVectorNumElements() == 1 &&
+ ValueVT.getVectorElementType() != PartEVT) {
+ bool Smaller = ValueVT.bitsLE(PartEVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT.getScalarType(), Val);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V);
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts. If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ EVT ValueVT = Val.getValueType();
+
+ // Handle the vector case separately.
+ if (ValueVT.isVector())
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned OrigNumParts = NumParts;
+ assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+ if (NumParts == 0)
+ return;
+
+ assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
+
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else {
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartEVT != ValueVT);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
+ if (NumParts == 1) {
+ if (PartEVT != ValueVT) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("scalar-to-vector conversion failed");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(RoundBits));
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
+
+ if (TLI.isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(1));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(0));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
+ }
+ }
+ }
+
+ if (TLI.isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+}
+
+
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V) {
+ EVT ValueVT = Val.getValueType();
+ assert(ValueVT.isVector() && "Not a vector");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (NumParts == 1) {
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (PartVT.isVector() &&
+ PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
+ PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+ for (unsigned i = ValueVT.getVectorNumElements(),
+ e = PartVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ElementVT));
+
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+
+ //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+ //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else if (PartVT.isVector() &&
+ PartEVT.getVectorElementType().bitsGE(
+ ValueVT.getVectorElementType()) &&
+ PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+ // Promoted vector extract
+ bool Smaller = PartEVT.bitsLE(ValueVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, PartVT, Val);
+ } else{
+ // Vector -> scalar conversion.
+ assert(ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ PartVT, Val, DAG.getIntPtrConstant(0));
+
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, PartVT, Val);
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+ IntermediateVT,
+ NumIntermediates, RegisterVT);
+ unsigned NumElements = ValueVT.getVectorNumElements();
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i) {
+ if (IntermediateVT.isVector())
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ IntermediateVT, Val,
+ DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
+ else
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntermediateVT, Val, DAG.getIntPtrConstant(i));
+ }
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
+ }
+}
+
+namespace {
+ /// RegsForValue - This struct represents the registers (physical or virtual)
+ /// that a particular set of values is assigned, and the type information
+ /// about the value. The most common situation is to represent one value at a
+ /// time, but struct or array values are handled element-wise as multiple
+ /// values. The splitting of aggregates is performed recursively, so that we
+ /// never have aggregate-typed registers. The values at this point do not
+ /// necessarily have legal types, so each value may require one or more
+ /// registers of some legal type.
+ ///
+ struct RegsForValue {
+ /// ValueVTs - The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ ///
+ SmallVector<EVT, 4> ValueVTs;
+
+ /// RegVTs - The value types of the registers. This is the same size as
+ /// ValueVTs and it records, for each value, what the type of the assigned
+ /// register or registers are. (Individual values are never synthesized
+ /// from more than one type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ ///
+ SmallVector<MVT, 4> RegVTs;
+
+ /// Regs - This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ ///
+ SmallVector<unsigned, 4> Regs;
+
+ RegsForValue() {}
+
+ RegsForValue(const SmallVector<unsigned, 4> &regs,
+ MVT regvt, EVT valuevt)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+
+ RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+ unsigned Reg, Type *Ty) {
+ ComputeValueVTs(tli, Ty, ValueVTs);
+
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
+ }
+
+ /// areValueTypesLegal - Return true if types of all the values are legal.
+ bool areValueTypesLegal(const TargetLowering &TLI) {
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ MVT RegisterVT = RegVTs[Value];
+ if (!TLI.isTypeLegal(RegisterVT))
+ return false;
+ }
+ return true;
+ }
+
+ /// append - Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ }
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVTs value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag,
+ const Value *V = 0) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+ /// specified value into the registers specified by this object. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag, const Value *V) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker, matching input operand index
+ /// (if applicable), and includes the number of values added into it.
+ void AddInlineAsmOperands(unsigned Kind,
+ bool HasMatching, unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const;
+ };
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+ FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
+ // A Value with type {} or [0 x %t] needs no registers.
+ if (ValueVTs.empty())
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (Flag == 0) {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ } else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+
+ Chain = P.getValue(1);
+ Parts[i] = P;
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ !RegisterVT.isInteger() || RegisterVT.isVector())
+ continue;
+
+ const FunctionLoweringInfo::LiveOutInfo *LOI =
+ FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+ if (!LOI)
+ continue;
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI->NumSignBits;
+ unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+ else
+ continue;
+
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+ NumRegs, RegisterVT, ValueVT, V);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+ ISD::NodeType ExtendKind =
+ TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND;
+
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
+ &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (Flag == 0) {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ } else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ if (HasMatching)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ else if (!Regs.empty() &&
+ TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+ // Put the register class of the virtual registers in the flag word. That
+ // way, later passes can recompute register class constraints for inline
+ // assembly as well as normal instructions.
+ // Don't do this for tied operands that can use the regclass information
+ // from the def.
+ const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+
+ SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+ Ops.push_back(Res);
+
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+ MVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ }
+ }
+}
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li) {
+ AA = &aa;
+ GFI = gfi;
+ LibInfo = li;
+ TD = DAG.getTarget().getDataLayout();
+ Context = DAG.getContext();
+ LPadToCallSiteMap.clear();
+}
+
+/// clear - Clear out the current SelectionDAG and the associated
+/// state and prepare this SelectionDAGBuilder object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGBuilder::clear() {
+ NodeMap.clear();
+ UnusedArgNodeMap.clear();
+ PendingLoads.clear();
+ PendingExports.clear();
+ CurDebugLoc = DebugLoc();
+ HasTailCall = false;
+}
+
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is separated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+ DanglingDebugInfoMap.clear();
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGBuilder::getRoot() {
+ if (PendingLoads.empty())
+ return DAG.getRoot();
+
+ if (PendingLoads.size() == 1) {
+ SDValue Root = PendingLoads[0];
+ DAG.setRoot(Root);
+ PendingLoads.clear();
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingLoads[0], PendingLoads.size());
+ PendingLoads.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGBuilder::getControlRoot() {
+ SDValue Root = DAG.getRoot();
+
+ if (PendingExports.empty())
+ return Root;
+
+ // Turn all of the CopyToReg chains into one factored node.
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = PendingExports.size();
+ for (; i != e; ++i) {
+ assert(PendingExports[i].getNode()->getNumOperands() > 1);
+ if (PendingExports[i].getNode()->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ PendingExports.push_back(Root);
+ }
+
+ Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingExports[0],
+ PendingExports.size());
+ PendingExports.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
+ if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
+ DAG.AssignOrdering(Node, SDNodeOrder);
+
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
+ AssignOrderingToNode(Node->getOperand(I).getNode());
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+ // Set up outgoing PHI node register values before emitting the terminator.
+ if (isa<TerminatorInst>(&I))
+ HandlePHINodesInSuccessorBlocks(I.getParent());
+
+ CurDebugLoc = I.getDebugLoc();
+
+ visit(I.getOpcode(), I);
+
+ if (!isa<TerminatorInst>(&I) && !HasTailCall)
+ CopyToExportRegsIfNeeded(&I);
+
+ CurDebugLoc = DebugLoc();
+}
+
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown instruction type encountered!");
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
+#include "llvm/IR/Instruction.def"
+ }
+
+ // Assign the ordering to the freshly created DAG nodes.
+ if (NodeMap.count(&I)) {
+ ++SDNodeOrder;
+ AssignOrderingToNode(getValue(&I).getNode());
+ }
+}
+
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+ SDValue Val) {
+ DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+ if (DDI.getDI()) {
+ const DbgValueInst *DI = DDI.getDI();
+ DebugLoc dl = DDI.getdl();
+ unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+ MDNode *Variable = DI->getVariable();
+ uint64_t Offset = DI->getOffset();
+ SDDbgValue *SDV;
+ if (Val.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+ SDV = DAG.getDbgValue(Variable, Val.getNode(),
+ Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, Val.getNode(), false);
+ }
+ } else
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ DanglingDebugInfoMap[V] = DanglingDebugInfo();
+ }
+}
+
+/// getValue - Return an SDValue for the given Value.
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+ // If we already have an SDValue for this value, use it. It's important
+ // to do this first, so that we don't create a CopyFromReg if we already
+ // have a regular SDValue.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // If there's a virtual register allocated and initialized for this
+ // value, use it.
+ DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ if (It != FuncInfo.ValueMap.end()) {
+ unsigned InReg = It->second;
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
+ resolveDanglingDebugInfo(V, N);
+ return N;
+ }
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+ // If we already have an SDValue for this value, use it.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ EVT VT = TLI.getValueType(V->getType(), true);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return DAG.getConstant(*CI, VT);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
+
+ if (isa<ConstantPointerNull>(C))
+ return DAG.getConstant(0, TLI.getPointerTy());
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return DAG.getConstantFP(*CFP, VT);
+
+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+ return DAG.getUNDEF(VT);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDValue N1 = NodeMap[V];
+ assert(N1.getNode() && "visit didn't populate the NodeMap!");
+ return N1;
+ }
+
+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ SmallVector<SDValue, 4> Constants;
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI) {
+ SDNode *Val = getValue(*OI).getNode();
+ // If the operand is an empty aggregate, there are no values.
+ if (!Val) continue;
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Constants.push_back(SDValue(Val, i));
+ }
+
+ return DAG.getMergeValues(&Constants[0], Constants.size(),
+ getCurDebugLoc());
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Ops.push_back(SDValue(Val, i));
+ }
+
+ if (isa<ArrayType>(CDS->getType()))
+ return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+
+ if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+ "Unknown struct or array constant!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, C->getType(), ValueVTs);
+ unsigned NumElts = ValueVTs.size();
+ if (NumElts == 0)
+ return SDValue(); // empty struct
+ SmallVector<SDValue, 4> Constants(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ EVT EltVT = ValueVTs[i];
+ if (isa<UndefValue>(C))
+ Constants[i] = DAG.getUNDEF(EltVT);
+ else if (EltVT.isFloatingPoint())
+ Constants[i] = DAG.getConstantFP(0, EltVT);
+ else
+ Constants[i] = DAG.getConstant(0, EltVT);
+ }
+
+ return DAG.getMergeValues(&Constants[0], NumElts,
+ getCurDebugLoc());
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return DAG.getBlockAddress(BA, VT);
+
+ VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = VecTy->getNumElements();
+
+ // Now that we know the number and type of the elements, get that number of
+ // elements into the Ops array based on what kind of constant it is.
+ SmallVector<SDValue, 16> Ops;
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CV->getOperand(i)));
+ } else {
+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+ EVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+ SDValue Op;
+ if (EltVT.isFloatingPoint())
+ Op = DAG.getConstantFP(0, EltVT);
+ else
+ Op = DAG.getConstant(0, EltVT);
+ Ops.assign(NumElements, Op);
+ }
+
+ // Create a BUILD_VECTOR node.
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+
+ // If this is a static alloca, generate it as the frameindex instead of
+ // computation.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+ }
+
+ // If this is an instruction which fast-isel has deferred, select it now.
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
+ }
+
+ llvm_unreachable("Can't get register for value!");
+}
+
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+ SDValue Chain = getControlRoot();
+ SmallVector<ISD::OutputArg, 8> Outs;
+ SmallVector<SDValue, 8> OutVals;
+
+ if (!FuncInfo.CanLowerReturn) {
+ unsigned DemoteReg = FuncInfo.DemoteRegister;
+ const Function *F = I.getParent()->getParent();
+
+ // Emit a store of the return value through the virtual register.
+ // Leave Outs empty so that LowerReturn won't try to load return
+ // registers the usual way.
+ SmallVector<EVT, 1> PtrValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+ PtrValueVTs);
+
+ SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+
+ SmallVector<SDValue, 4> Chains(NumValues);
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ RetPtr.getValueType(), RetPtr,
+ DAG.getIntPtrConstant(Offsets[i]));
+ Chains[i] =
+ DAG.getStore(Chain, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ // FIXME: better loc info would be nice.
+ Add, MachinePointerInfo(), false, false, 0);
+ }
+
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ } else if (I.getNumOperands() != 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues) {
+ SDValue RetOp = getValue(I.getOperand(0));
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ const Function *F = I.getParent()->getParent();
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+ VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind);
+
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+ MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, &I, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (ExtendKind == ISD::SIGN_EXTEND)
+ Flags.setSExt();
+ else if (ExtendKind == ISD::ZERO_EXTEND)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ /*isfixed=*/true, 0, 0));
+ OutVals.push_back(Parts[i]);
+ }
+ }
+ }
+ }
+
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ CallingConv::ID CallConv =
+ DAG.getMachineFunction().getFunction()->getCallingConv();
+ Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+ Outs, OutVals, getCurDebugLoc(), DAG);
+
+ // Verify that the target's LowerReturn behaved as expected.
+ assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ "LowerReturn didn't return a valid chain!");
+
+ // Update the DAG with the new chain value resulting from return lowering.
+ DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ return;
+
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ assert(!V->use_empty() && "Unused value assigned virtual registers!");
+ CopyValueToVirtualRegister(V, VMI->second);
+ }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (const Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB == &FromBB->getParent()->getEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (!BPI)
+ return 0;
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
+ return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::
+addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ uint32_t Weight /* = 0 */) {
+ if (!Weight)
+ Weight = getEdgeWeight(Src, Dst);
+ Src->addSuccessor(Dst, Weight);
+}
+
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ if (CurBB == SwitchBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+ ISD::CondCode Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = getICmpCondCode(IC->getPredicate());
+ } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ Condition = getFCmpCondCode(FC->getPredicate());
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ } else {
+ Condition = ISD::SETEQ; // silence warning.
+ llvm_unreachable("Unknown compare instruction");
+ }
+
+ CaseBlock CB(Condition, BOp->getOperand(0),
+ BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+ return;
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ unsigned Opc) {
+ // If this node is not part of the or/and tree, emit it as a branch.
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+ (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI = CurBB;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].CC == Cases[1].CC &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+ MachineBasicBlock *BrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = BrMBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ if (I.isUnconditional()) {
+ // Update machine-CFG edges.
+ BrMBB->addSuccessor(Succ0MBB);
+
+ // If this is not a fall-through branch, emit the branch.
+ if (Succ0MBB != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Succ0MBB)));
+
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive, this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ //
+ if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+ if (!TLI.isJumpExpensive() &&
+ BOp->hasOneUse() &&
+ (BOp->getOpcode() == Instruction::And ||
+ BOp->getOpcode() == Instruction::Or)) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+ BOp->getOpcode());
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SwitchCases)) {
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SwitchCases[0], BrMBB);
+ SwitchCases.erase(SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+ FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+
+ SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+ NULL, Succ0MBB, Succ1MBB, BrMBB);
+
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB, BrMBB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB) {
+ SDValue Cond;
+ SDValue CondLHS = getValue(CB.CmpLHS);
+ DebugLoc dl = getCurDebugLoc();
+
+ // Build the setcc now.
+ if (CB.CmpMHS == NULL) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ) {
+ SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+ } else
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ assert(CB.CC == ISD::SETCC_INVALID &&
+ "Condition is undefined for to-the-range belonging check.");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ SDValue CmpOp = getValue(CB.CmpMHS);
+ EVT VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+ ISD::SETULE);
+ } else {
+ SDValue SUB = DAG.getNode(ISD::SUB, dl,
+ VT, CmpOp, DAG.getConstant(Low, VT));
+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+ DAG.getConstant(High-Low, VT), ISD::SETULE);
+ }
+ }
+
+ // Update successor info
+ addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDValue True = DAG.getConstant(1, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+ }
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ EVT PTy = TLI.getPointerTy();
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ JT.Reg, PTy);
+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+ SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+ MVT::Other, Index.getValue(1),
+ Table, Index);
+ DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+ JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB) {
+ // Subtract the lowest switch case value from the value being switched on and
+ // conditional branch to default mbb if the result is greater than the
+ // difference between smallest and largest cases.
+ SDValue SwitchOp = getValue(JTH.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(JTH.First, VT));
+
+ // The SDNode we just created, which holds the value being switched on minus
+ // the smallest case value, needs to be copied to a virtual register so it
+ // can be used as an index into the jump table in a subsequent basic block.
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
+
+ unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the largest
+ // case in the switch.
+ SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(Sub.getValueType()), Sub,
+ DAG.getConstant(JTH.Last-JTH.First,VT),
+ ISD::SETUGT);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ if (JT.MBB != NextBlock)
+ BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ // Subtract the minimum value
+ SDValue SwitchOp = getValue(B.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(B.First, VT));
+
+ // Check range
+ SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(Sub.getValueType()),
+ Sub, DAG.getConstant(B.Range, VT),
+ ISD::SETUGT);
+
+ // Determine the type of the test operands.
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT))
+ UsePtrType = true;
+ else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ if (UsePtrType) {
+ VT = TLI.getPointerTy();
+ Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+ }
+
+ B.RegVT = VT.getSimpleVT();
+ B.Reg = FuncInfo.CreateReg(B.RegVT);
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ B.Reg, Sub);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+ addSuccessorWithWeight(SwitchBB, B.Default);
+ addSuccessorWithWeight(SwitchBB, MBB);
+
+ SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+
+ if (MBB != NextBlock)
+ BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+ DAG.getBasicBlock(MBB));
+
+ DAG.setRoot(BrRange);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ uint32_t BranchWeightToNext,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ MVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ Reg, VT);
+ SDValue Cmp;
+ unsigned PopCount = CountPopulation_64(B.Mask);
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ ShiftOp,
+ DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
+ ISD::SETEQ);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ ShiftOp,
+ DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+ ISD::SETNE);
+ } else {
+ // Make desired shift
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+ DAG.getConstant(1, VT), ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ VT, SwitchVal, DAG.getConstant(B.Mask, VT));
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ AndOp, DAG.getConstant(0, VT),
+ ISD::SETNE);
+ }
+
+ // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
+ addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
+ // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
+ addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
+
+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ Cmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = SwitchBB;
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ if (NextMBB != NextBlock)
+ BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB));
+
+ DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+ MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
+
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ const Value *Callee(I.getCalledValue());
+ const Function *Fn = dyn_cast<Function>(Callee);
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(&I);
+ else if (Fn && Fn->isIntrinsic()) {
+ assert(Fn->getIntrinsicID() == Intrinsic::donothing);
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ } else
+ LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ CopyToExportRegsIfNeeded(&I);
+
+ // Update successor info
+ addSuccessorWithWeight(InvokeMBB, Return);
+ addSuccessorWithWeight(InvokeMBB, LandingPad);
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to landingpad not in landing pad!");
+
+ MachineBasicBlock *MBB = FuncInfo.MBB;
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ AddLandingPadInfo(LP, MMI, MBB);
+
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother to create these DAG nodes.
+ if (TLI.getExceptionPointerRegister() == 0 &&
+ TLI.getExceptionSelectorRegister() == 0)
+ return;
+
+ SmallVector<EVT, 2> ValueVTs;
+ ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+
+ // Insert the EXCEPTIONADDR instruction.
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[2];
+ Ops[0] = DAG.getRoot();
+ SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1);
+ SDValue Chain = Op1.getValue(1);
+
+ // Insert the EHSELECTION instruction.
+ VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ Ops[0] = Op1;
+ Ops[1] = Chain;
+ SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2);
+ Chain = Op2.getValue(1);
+ Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32);
+
+ Ops[0] = Op1;
+ Ops[1] = Op2;
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Ops[0], 2);
+
+ std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
+ setValue(&LP, RetPair.first);
+ DAG.setRoot(RetPair.second);
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock *Default,
+ MachineBasicBlock *SwitchBB) {
+ // Size is the number of Cases represented by this range.
+ size_t Size = CR.Range.second - CR.Range.first;
+ if (Size > 3)
+ return false;
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != FuncInfo.MF->end())
+ NextBlock = BBI;
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ // If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+ // TODO: Handle cases where CR.CaseBB != SwitchBB.
+ if (Size == 2 && CR.CaseBB == SwitchBB) {
+ Case &Small = *CR.Range.first;
+ Case &Big = *(CR.Range.second-1);
+
+ if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+ const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+ const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+ // Check that there is only one bit different.
+ if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+ (SmallValue | BigValue) == BigValue) {
+ // Isolate the common bit.
+ APInt CommonBit = BigValue & ~SmallValue;
+ assert((SmallValue | CommonBit) == BigValue &&
+ CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+ SDValue CondLHS = getValue(SV);
+ EVT VT = CondLHS.getValueType();
+ DebugLoc DL = getCurDebugLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, VT));
+ SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+ Or, DAG.getConstant(BigValue, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ // Both Small and Big will jump to Small.BB, so we sum up the weights.
+ addSuccessorWithWeight(SwitchBB, Small.BB,
+ Small.ExtraWeight + Big.ExtraWeight);
+ addSuccessorWithWeight(SwitchBB, Default,
+ // The default destination is the first successor in IR.
+ BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
+
+ // Insert the true branch.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+ getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.BB));
+
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(Default));
+
+ DAG.setRoot(BrCond);
+ return true;
+ }
+ }
+ }
+
+ // Order cases by weight so the most likely case will be checked first.
+ uint32_t UnhandledWeights = 0;
+ if (BPI) {
+ for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
+ uint32_t IWeight = I->ExtraWeight;
+ UnhandledWeights += IWeight;
+ for (CaseItr J = CR.Range.first; J < I; ++J) {
+ uint32_t JWeight = J->ExtraWeight;
+ if (IWeight > JWeight)
+ std::swap(*I, *J);
+ }
+ }
+ }
+ // Rearrange the case blocks so that the last one falls through if possible.
+ Case &BackCase = *(CR.Range.second-1);
+ if (Size > 1 &&
+ NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+ // The last case block won't fall through into 'NextBlock' if we emit the
+ // branches in this order. See if rearranging a case value would help.
+ // We start at the bottom as it's the case with the least weight.
+ for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){
+ if (I->BB == NextBlock) {
+ std::swap(*I, BackCase);
+ break;
+ }
+ }
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the Case's target mbb if the value being switched on SV is equal
+ // to C.
+ MachineBasicBlock *CurBlock = CR.CaseBB;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ MachineBasicBlock *FallThrough;
+ if (I != E-1) {
+ FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+ CurMF->insert(BBI, FallThrough);
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ } else {
+ // If the last case doesn't match, go to the default block.
+ FallThrough = Default;
+ }
+
+ const Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->High == I->Low) {
+ // This is just small small case range :) containing exactly 1 case
+ CC = ISD::SETEQ;
+ LHS = SV; RHS = I->High; MHS = NULL;
+ } else {
+ CC = ISD::SETCC_INVALID;
+ LHS = I->Low; MHS = SV; RHS = I->High;
+ }
+
+ // The false weight should be sum of all un-handled cases.
+ UnhandledWeights -= I->ExtraWeight;
+ CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
+ /* me */ CurBlock,
+ /* trueweight */ I->ExtraWeight,
+ /* falseweight */ UnhandledWeights);
+
+ // If emitting the first comparison, just call visitSwitchCase to emit the
+ // code into the current block. Otherwise, push the CaseBlock onto the
+ // vector to be later processed by SDISel, and insert the node's MBB
+ // before the next MBB.
+ if (CurBlock == SwitchBB)
+ visitSwitchCase(CB, SwitchBB);
+ else
+ SwitchCases.push_back(CB);
+
+ CurBlock = FallThrough;
+ }
+
+ return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+ return TLI.supportJumpTables() &&
+ (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+ uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+ APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth);
+ return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
+ CaseRecVector &WorkList,
+ const Value *SV,
+ MachineBasicBlock *Default,
+ MachineBasicBlock *SwitchBB) {
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+
+ APInt TSize(First.getBitWidth(), 0);
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
+ TSize += I->size();
+
+ if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
+ return false;
+
+ APInt Range = ComputeRange(First, Last);
+ // The density is TSize / Range. Require at least 40%.
+ // It should not be possible for IntTSize to saturate for sane code, but make
+ // sure we handle Range saturation correctly.
+ uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
+ uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
+ if (IntTSize * 10 < IntRange * 4)
+ return false;
+
+ DEBUG(dbgs() << "Lowering jump table\n"
+ << "First entry: " << First << ". Last entry: " << Last << '\n'
+ << "Range: " << Range << ". Size: " << TSize << ".\n\n");
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Create a new basic block to hold the code for loading the address
+ // of the jump table, and jumping to it. Update successor information;
+ // we will either branch to the default case for the switch, or the jump
+ // table.
+ MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, JumpTableBB);
+
+ addSuccessorWithWeight(CR.CaseBB, Default);
+ addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
+
+ // Build a vector of destination BBs, corresponding to each target
+ // of the jump table. If the value of the jump table slot corresponds to
+ // a case statement, push the case's BB onto the vector, otherwise, push
+ // the default BB.
+ std::vector<MachineBasicBlock*> DestBBs;
+ APInt TEI = First;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+ const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
+ const APInt &High = cast<ConstantInt>(I->High)->getValue();
+
+ if (Low.ule(TEI) && TEI.ule(High)) {
+ DestBBs.push_back(I->BB);
+ if (TEI==High)
+ ++I;
+ } else {
+ DestBBs.push_back(Default);
+ }
+ }
+
+ // Calculate weight for each unique destination in CR.
+ DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
+ if (FuncInfo.BPI)
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+ DestWeights.find(I->BB);
+ if (Itr != DestWeights.end())
+ Itr->second += I->ExtraWeight;
+ else
+ DestWeights[I->BB] = I->ExtraWeight;
+ }
+
+ // Update successor info. Add one edge to each unique successor.
+ BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+ for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+ E = DestBBs.end(); I != E; ++I) {
+ if (!SuccsHandled[(*I)->getNumber()]) {
+ SuccsHandled[(*I)->getNumber()] = true;
+ DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+ DestWeights.find(*I);
+ addSuccessorWithWeight(JumpTableBB, *I,
+ Itr != DestWeights.end() ? Itr->second : 0);
+ }
+ }
+
+ // Create a jump table index for this jump table.
+ unsigned JTEncoding = TLI.getJumpTableEncoding();
+ unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
+ ->createJumpTableIndex(DestBBs);
+
+ // Set the jump table information so that we can codegen it as a second
+ // MachineBasicBlock
+ JumpTable JT(-1U, JTI, JumpTableBB, Default);
+ JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
+ if (CR.CaseBB == SwitchBB)
+ visitJumpTableHeader(JT, JTH, SwitchBB);
+
+ JTCases.push_back(JumpTableBlock(JTH, JT));
+ return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock *Default,
+ MachineBasicBlock *SwitchBB) {
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Size is the number of Cases represented by this range.
+ unsigned Size = CR.Range.second - CR.Range.first;
+
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+ double FMetric = 0;
+ CaseItr Pivot = CR.Range.first + Size/2;
+
+ // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+ // (heuristically) allow us to emit JumpTable's later.
+ APInt TSize(First.getBitWidth(), 0);
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ APInt LSize = FrontCase.size();
+ APInt RSize = TSize-LSize;
+ DEBUG(dbgs() << "Selecting best pivot: \n"
+ << "First: " << First << ", Last: " << Last <<'\n'
+ << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+ for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+ J!=E; ++I, ++J) {
+ const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+ const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+ APInt Range = ComputeRange(LEnd, RBegin);
+ assert((Range - 2ULL).isNonNegative() &&
+ "Invalid case distance");
+ // Use volatile double here to avoid excess precision issues on some hosts,
+ // e.g. that use 80-bit X87 registers.
+ volatile double LDensity =
+ (double)LSize.roundToDouble() /
+ (LEnd - First + 1ULL).roundToDouble();
+ volatile double RDensity =
+ (double)RSize.roundToDouble() /
+ (Last - RBegin + 1ULL).roundToDouble();
+ double Metric = Range.logBase2()*(LDensity+RDensity);
+ // Should always split in some non-trivial place
+ DEBUG(dbgs() <<"=>Step\n"
+ << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+ << "LDensity: " << LDensity
+ << ", RDensity: " << RDensity << '\n'
+ << "Metric: " << Metric << '\n');
+ if (FMetric < Metric) {
+ Pivot = J;
+ FMetric = Metric;
+ DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
+ }
+
+ LSize += J->size();
+ RSize -= J->size();
+ }
+ if (areJTsAllowed(TLI)) {
+ // If our case is dense we *really* should handle it earlier!
+ assert((FMetric > 0) && "Should handle dense range earlier!");
+ } else {
+ Pivot = CR.Range.first + Size/2;
+ }
+
+ CaseRange LHSR(CR.Range.first, Pivot);
+ CaseRange RHSR(Pivot, CR.Range.second);
+ const Constant *C = Pivot->Low;
+ MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+ // We know that we branch to the LHS if the Value being switched on is
+ // less than the Pivot value, C. We use this to optimize our binary
+ // tree a bit, by recognizing that if SV is greater than or equal to the
+ // LHS's Case Value, and that Case Value is exactly one less than the
+ // Pivot's Value, then we can branch directly to the LHS's Target,
+ // rather than creating a leaf node for it.
+ if ((LHSR.second - LHSR.first) == 1 &&
+ LHSR.first->High == CR.GE &&
+ cast<ConstantInt>(C)->getValue() ==
+ (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ TrueBB = LHSR.first->BB;
+ } else {
+ TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, TrueBB);
+ WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Similar to the optimization above, if the Value being switched on is
+ // known to be less than the Constant CR.LT, and the current Case Value
+ // is CR.LT - 1, then we can branch directly to the target block for
+ // the current Case Value, rather than emitting a RHS leaf node for it.
+ if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+ cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+ (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ FalseBB = RHSR.first->BB;
+ } else {
+ FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, FalseBB);
+ WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the LHS node if the value being switched on SV is less than C.
+ // Otherwise, branch to LHS.
+ CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+ if (CR.CaseBB == SwitchBB)
+ visitSwitchCase(CB, SwitchBB);
+ else
+ SwitchCases.push_back(CB);
+
+ return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB){
+ EVT PTy = TLI.getPointerTy();
+ unsigned IntPtrBits = PTy.getSizeInBits();
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = FuncInfo.MF;
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+ return false;
+
+ size_t numCmps = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I) {
+ // Single case counts one, case range - two.
+ numCmps += (I->Low == I->High ? 1 : 2);
+ }
+
+ // Count unique destinations
+ SmallSet<MachineBasicBlock*, 4> Dests;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ Dests.insert(I->BB);
+ if (Dests.size() > 3)
+ // Don't bother the code below, if there are too much unique destinations
+ return false;
+ }
+ DEBUG(dbgs() << "Total number of unique destinations: "
+ << Dests.size() << '\n'
+ << "Total number of comparisons: " << numCmps << '\n');
+
+ // Compute span of values.
+ const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+ APInt cmpRange = maxValue - minValue;
+
+ DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
+ << "Low bound: " << minValue << '\n'
+ << "High bound: " << maxValue << '\n');
+
+ if (cmpRange.uge(IntPtrBits) ||
+ (!(Dests.size() == 1 && numCmps >= 3) &&
+ !(Dests.size() == 2 && numCmps >= 5) &&
+ !(Dests.size() >= 3 && numCmps >= 6)))
+ return false;
+
+ DEBUG(dbgs() << "Emitting bit tests\n");
+ APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+ // Optimize the case where all the case values fit in a
+ // word without having to subtract minValue. In this case,
+ // we can optimize away the subtraction.
+ if (maxValue.ult(IntPtrBits)) {
+ cmpRange = maxValue;
+ } else {
+ lowBound = minValue;
+ }
+
+ CaseBitsVector CasesBits;
+ unsigned i, count = 0;
+
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ MachineBasicBlock* Dest = I->BB;
+ for (i = 0; i < count; ++i)
+ if (Dest == CasesBits[i].BB)
+ break;
+
+ if (i == count) {
+ assert((count < 3) && "Too much destinations to test!");
+ CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
+ count++;
+ }
+
+ const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+ const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+ uint64_t lo = (lowValue - lowBound).getZExtValue();
+ uint64_t hi = (highValue - lowBound).getZExtValue();
+ CasesBits[i].ExtraWeight += I->ExtraWeight;
+
+ for (uint64_t j = lo; j <= hi; j++) {
+ CasesBits[i].Mask |= 1ULL << j;
+ CasesBits[i].Bits++;
+ }
+
+ }
+ std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+ BitTestInfo BTC;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ DEBUG(dbgs() << "Cases:\n");
+ for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+ DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
+ << ", Bits: " << CasesBits[i].Bits
+ << ", BB: " << CasesBits[i].BB << '\n');
+
+ MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, CaseBB);
+ BTC.push_back(BitTestCase(CasesBits[i].Mask,
+ CaseBB,
+ CasesBits[i].BB, CasesBits[i].ExtraWeight));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ BitTestBlock BTB(lowBound, cmpRange, SV,
+ -1U, MVT::Other, (CR.CaseBB == SwitchBB),
+ CR.CaseBB, Default, BTC);
+
+ if (CR.CaseBB == SwitchBB)
+ visitBitTestHeader(BTB, SwitchBB);
+
+ BitTestCases.push_back(BTB);
+
+ return true;
+}
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+ const SwitchInst& SI) {
+
+ /// Use a shorter form of declaration, and also
+ /// show the we want to use CRSBuilder as Clusterifier.
+ typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
+
+ Clusterifier TheClusterifier;
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ // Start with "simple" cases
+ for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ const BasicBlock *SuccBB = i.getCaseSuccessor();
+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
+
+ TheClusterifier.add(i.getCaseValueEx(), SMBB,
+ BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
+ }
+
+ TheClusterifier.optimize();
+
+ size_t numCmps = 0;
+ for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
+ e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
+ Clusterifier::Cluster &C = *i;
+ // Update edge weight for the cluster.
+ unsigned W = C.first.Weight;
+
+ // FIXME: Currently work with ConstantInt based numbers.
+ // Changing it to APInt based is a pretty heavy for this commit.
+ Cases.push_back(Case(C.first.getLow().toConstantInt(),
+ C.first.getHigh().toConstantInt(), C.second, W));
+
+ if (C.first.getLow() != C.first.getHigh())
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+ if (JTCases[i].first.HeaderBB == First)
+ JTCases[i].first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+ if (BitTestCases[i].Parent == First)
+ BitTestCases[i].Parent = Last;
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // If there is only the default destination, branch to it if it is not the
+ // next basic block. Otherwise, just fall through.
+ if (!SI.getNumCases()) {
+ // Update machine-CFG edges.
+
+ // If this is not a fall-through branch, emit the branch.
+ SwitchMBB->addSuccessor(Default);
+ if (Default != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Default)));
+
+ return;
+ }
+
+ // If there are any non-default case statements, create a vector of Cases
+ // representing each one, and sort the vector so that we can efficiently
+ // create a binary search tree from them.
+ CaseVector Cases;
+ size_t numCmps = Clusterify(Cases, SI);
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << '\n');
+ (void)numCmps;
+
+ // Get the Value to be switched on and default basic blocks, which will be
+ // inserted into CaseBlock records, representing basic blocks in the binary
+ // search tree.
+ const Value *SV = SI.getCondition();
+
+ // Push the initial CaseRec onto the worklist
+ CaseRecVector WorkList;
+ WorkList.push_back(CaseRec(SwitchMBB,0,0,
+ CaseRange(Cases.begin(),Cases.end())));
+
+ while (!WorkList.empty()) {
+ // Grab a record representing a case range to process off the worklist
+ CaseRec CR = WorkList.back();
+ WorkList.pop_back();
+
+ if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+ continue;
+
+ // If the range has few cases (two or less) emit a series of specific
+ // tests.
+ if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
+ continue;
+
+ // If the switch has more than N blocks, and is at least 40% dense, and the
+ // target supports indirect branches, then emit a jump table rather than
+ // lowering the switch to a binary tree of conditional branches.
+ // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
+ if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+ continue;
+
+ // Emit binary tree. We need to pick a pivot, and push left and right ranges
+ // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+ handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
+ }
+}
+
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+ MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges with unique successors.
+ SmallSet<BasicBlock*, 32> Done;
+ for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+ BasicBlock *BB = I.getSuccessor(i);
+ bool Inserted = Done.insert(BB);
+ if (!Inserted)
+ continue;
+
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
+ addSuccessorWithWeight(IndirectBrMBB, Succ);
+ }
+
+ DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitFSub(const User &I) {
+ // -0.0 - X --> fneg
+ Type *Ty = I.getType();
+ if (isa<Constant>(I.getOperand(0)) &&
+ I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+
+ visitBinary(I, ISD::FSUB);
+}
+
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+
+ // Coerce the shift amount to the right type if we can.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ unsigned ShiftSize = ShiftTy.getSizeInBits();
+ unsigned Op2Size = Op2.getValueType().getSizeInBits();
+ DebugLoc DL = getCurDebugLoc();
+
+ // If the operand is smaller than the shift count type, promote it.
+ if (ShiftSize > Op2Size)
+ Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
+ // If the operand is larger than the shift count type but the shift
+ // count type has enough bits to represent any shift value, truncate
+ // it now. This is a common case and it exposes the truncate to
+ // optimization early.
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+ // Otherwise we'll need to temporarily settle for some other convenient
+ // type. Type legalization will make adjustments once the shiftee is split.
+ else
+ Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
+ }
+
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ // Turn exact SDivs into multiplications.
+ // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+ // exact bit.
+ if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+ !isa<ConstantSDNode>(Op1) &&
+ isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+ setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+ else
+ setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+ Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitICmp(const User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(const User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGBuilder::visitSelect(const User &I) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Cond = getValue(I.getOperand(0));
+ SDValue TrueVal = getValue(I.getOperand(1));
+ SDValue FalseVal = getValue(I.getOperand(2));
+ ISD::NodeType OpCode = Cond.getValueType().isVector() ?
+ ISD::VSELECT : ISD::SELECT;
+
+ for (unsigned i = 0; i != NumValues; ++i)
+ Values[i] = DAG.getNode(OpCode, getCurDebugLoc(),
+ TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+ Cond,
+ SDValue(TrueVal.getNode(),
+ TrueVal.getResNo() + i),
+ SDValue(FalseVal.getNode(),
+ FalseVal.getResNo() + i));
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitTrunc(const User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(const User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(const User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+ DestVT, N,
+ DAG.getTargetConstant(0, TLI.getPointerTy())));
+}
+
+void SelectionDAGBuilder::visitFPExt(const User &I){
+ // FPExt is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(const User &I){
+ // SIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitBitCast(const User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = TLI.getValueType(I.getType());
+
+ // BitCast assures us that source and destination are the same size so this is
+ // either a BITCAST or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+ DestVT, N)); // convert types.
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InVal = getValue(I.getOperand(1));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(2)));
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(1)));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// beginning from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+ unsigned Pos, unsigned Size, int Low) {
+ for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (Mask[i] >= 0 && Mask[i] != Low)
+ return false;
+ return true;
+}
+
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
+ SDValue Src1 = getValue(I.getOperand(0));
+ SDValue Src2 = getValue(I.getOperand(1));
+
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+ unsigned MaskNumElts = Mask.size();
+
+ EVT VT = TLI.getValueType(I.getType());
+ EVT SrcVT = Src1.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &Mask[0]));
+ return;
+ }
+
+ // Normalize the shuffle vector since mask and vector length don't match.
+ if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+ // Mask is longer than the source vectors and is a multiple of the source
+ // vectors. We can use concatenate vector to make the mask and vectors
+ // lengths match.
+ if (SrcNumElts*2 == MaskNumElts) {
+ // First check for Src1 in low and Src2 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+ // Then check for Src2 in low and Src1 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src2, Src1));
+ return;
+ }
+ }
+
+ // Pad both vectors with undefs to make them the same length as the mask.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+ bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+ SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+ MOps1[0] = Src1;
+ MOps2[0] = Src2;
+
+ Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps1[0], NumConcat);
+ Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps2[0], NumConcat);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts - MaskNumElts;
+ MappedOps.push_back(Idx);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+
+ if (SrcNumElts > MaskNumElts) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle. The analysis is done by calculating
+ // the range of elements the mask access on both vectors.
+ int MinRange[2] = { static_cast<int>(SrcNumElts),
+ static_cast<int>(SrcNumElts)};
+ int MaxRange[2] = {-1, -1};
+
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ unsigned Input = 0;
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= (int)SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+ if (Idx > MaxRange[Input])
+ MaxRange[Input] = Idx;
+ if (Idx < MinRange[Input])
+ MinRange[Input] = Idx;
+ }
+
+ // Check if the access is smaller than the vector size and can we find
+ // a reasonable extract index.
+ int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
+ // Extract.
+ int StartIdx[2]; // StartIdx to extract from
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
+ RangeUse[Input] = 0; // Unused
+ StartIdx[Input] = 0;
+ continue;
+ }
+
+ // Find a good start index that is a multiple of the mask length. Then
+ // see if the rest of the elements are in range.
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ }
+
+ if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+ return;
+ }
+ if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ SDValue &Src = Input == 0 ? Src1 : Src2;
+ if (RangeUse[Input] == 0)
+ Src = DAG.getUNDEF(VT);
+ else
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+ Src, DAG.getIntPtrConstant(StartIdx[Input]));
+ }
+
+ // Calculate new mask.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx >= 0) {
+ if (Idx < (int)SrcNumElts)
+ Idx -= StartIdx[0];
+ else
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ }
+ MappedOps.push_back(Idx);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+ }
+
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
+ EVT EltVT = VT.getVectorElementType();
+ EVT PtrVT = TLI.getPointerTy();
+ SmallVector<SDValue,8> Ops;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ SDValue Res;
+
+ if (Idx < 0) {
+ Res = DAG.getUNDEF(EltVT);
+ } else {
+ SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+ if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
+
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src, DAG.getConstant(Idx, PtrVT));
+ }
+
+ Ops.push_back(Res);
+ }
+
+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Value *Op1 = I.getOperand(1);
+ Type *AggTy = I.getType();
+ Type *ValTy = Op1->getType();
+ bool IntoUndef = isa<UndefValue>(Op0);
+ bool FromUndef = isa<UndefValue>(Op1);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumAggValues = AggValueVTs.size();
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumAggValues);
+
+ SDValue Agg = getValue(Op0);
+ unsigned i = 0;
+ // Copy the beginning value(s) from the original aggregate.
+ for (; i != LinearIndex; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+ // Copy values from the inserted value(s).
+ if (NumValValues) {
+ SDValue Val = getValue(Op1);
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ }
+ // Copy remaining value(s) from the original aggregate.
+ for (; i != NumAggValues; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&AggValueVTs[0], NumAggValues),
+ &Values[0], NumAggValues));
+}
+
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ Type *AggTy = Op0->getType();
+ Type *ValTy = I.getType();
+ bool OutOfUndef = isa<UndefValue>(Op0);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumValValues = ValValueVTs.size();
+
+ // Ignore a extractvalue that produces an empty object
+ if (!NumValValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
+ SmallVector<SDValue, 4> Values(NumValValues);
+
+ SDValue Agg = getValue(Op0);
+ // Copy out the selected value(s).
+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+ Values[i - LinearIndex] =
+ OutOfUndef ?
+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValValueVTs[0], NumValValues),
+ &Values[0], NumValValues));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ // Note that the pointer operand may be a vector of pointers. Take the scalar
+ // element which holds a pointer.
+ Type *Ty = I.getOperand(0)->getType()->getScalarType();
+
+ for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
+ OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ DAG.getConstant(Offset, N.getValueType()));
+ }
+
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero()) continue;
+ uint64_t Offs =
+ TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ SDValue OffsVal;
+ EVT PTy = TLI.getPointerTy();
+ unsigned PtrBits = PTy.getSizeInBits();
+ if (PtrBits < 64)
+ OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(Offs, MVT::i64));
+ else
+ OffsVal = DAG.getIntPtrConstant(Offs);
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ OffsVal);
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+ TD->getTypeAllocSize(Ty));
+ SDValue IdxN = getValue(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementSize != 1) {
+ if (ElementSize.isPowerOf2()) {
+ unsigned Amt = ElementSize.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, IdxN.getValueType()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType());
+ IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+ N.getValueType(), IdxN, Scale);
+ }
+ }
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ N.getValueType(), N, IdxN);
+ }
+ }
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ Type *Ty = I.getAllocatedType();
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
+ I.getAlignment());
+
+ SDValue AllocSize = getValue(I.getArraySize());
+
+ EVT IntPtr = TLI.getPointerTy();
+ if (AllocSize.getValueType() != IntPtr)
+ AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+ AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
+ AllocSize,
+ DAG.getConstant(TySize, IntPtr));
+
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size.
+ AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(StackAlign-1));
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+ SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+ VTs, Ops, 3);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ // Inform the Frame Information that we have just allocated a variable-sized
+ // object.
+ FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
+}
+
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+ if (I.isAtomic())
+ return visitAtomicLoad(I);
+
+ const Value *SV = I.getOperand(0);
+ SDValue Ptr = getValue(SV);
+
+ Type *Ty = I.getType();
+
+ bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ bool isInvariant = I.getMetadata("invariant.load") != 0;
+ unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ SDValue Root;
+ bool ConstantMemory = false;
+ if (I.isVolatile() || NumValues > MaxParallelChains)
+ // Serialize volatile loads with other side effects.
+ Root = getRoot();
+ else if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
+ EVT PtrVT = Ptr.getValueType();
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+ A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+ isNonTemporal, isInvariant, Alignment, TBAAInfo,
+ Ranges);
+
+ Values[i] = L;
+ Chains[ChainI] = L.getValue(1);
+ }
+
+ if (!ConstantMemory) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ if (isVolatile)
+ DAG.setRoot(Chain);
+ else
+ PendingLoads.push_back(Chain);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+ if (I.isAtomic())
+ return visitAtomicStore(I);
+
+ const Value *SrcV = I.getOperand(0);
+ const Value *PtrV = I.getOperand(1);
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ // Get the lowered operands. Note that we do this after
+ // checking if NumResults is zero, because with zero results
+ // the operands won't have values in the map.
+ SDValue Src = getValue(SrcV);
+ SDValue Ptr = getValue(PtrV);
+
+ SDValue Root = getRoot();
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
+ EVT PtrVT = Ptr.getValueType();
+ bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ Add, MachinePointerInfo(PtrV, Offsets[i]),
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ Chains[ChainI] = St;
+ }
+
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ ++SDNodeOrder;
+ AssignOrderingToNode(StoreNode.getNode());
+ DAG.setRoot(StoreNode);
+}
+
+static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
+ SynchronizationScope Scope,
+ bool Before, DebugLoc dl,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // Fence, if necessary
+ if (Before) {
+ if (Order == AcquireRelease || Order == SequentiallyConsistent)
+ Order = Release;
+ else if (Order == Acquire || Order == Monotonic)
+ return Chain;
+ } else {
+ if (Order == AcquireRelease)
+ Order = Acquire;
+ else if (Order == Release || Order == Monotonic)
+ return Chain;
+ }
+ SDValue Ops[3];
+ Ops[0] = Chain;
+ Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
+ return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
+}
+
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+ getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()),
+ MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ ISD::NodeType NT;
+ switch (I.getOperation()) {
+ default: llvm_unreachable("Unknown atomicrmw operation");
+ case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+ case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
+ case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
+ case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
+ case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+ case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
+ case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
+ case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
+ case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
+ case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+ case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+ }
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue L =
+ DAG.getAtomic(NT, dl,
+ getValue(I.getValOperand()).getValueType().getSimpleVT(),
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValOperand()),
+ I.getPointerOperand(), 0 /* Alignment */,
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+ DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ EVT VT = TLI.getValueType(I.getType());
+
+ if (I.getAlignment() < VT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic load");
+
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+ getValue(I.getPointerOperand()),
+ I.getPointerOperand(), I.getAlignment(),
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ EVT VT = TLI.getValueType(I.getValueOperand()->getType());
+
+ if (I.getAlignment() < VT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic store");
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue OutChain =
+ DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValueOperand()),
+ I.getPointerOperand(), I.getAlignment(),
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ DAG.setRoot(OutChain);
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ bool HasChain = !I.doesNotAccessMemory();
+ bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+ // Build the operand list.
+ SmallVector<SDValue, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Info is set by getTgtMemInstrinsic
+ TargetLowering::IntrinsicInfo Info;
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ SDValue Op = getValue(I.getArgOperand(i));
+ Ops.push_back(Op);
+ }
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+
+ if (HasChain)
+ ValueVTs.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+
+ // Create the node.
+ SDValue Result;
+ if (IsTgtIntrinsic) {
+ // This is target intrinsic that touches memory
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size(),
+ Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align, Info.vol,
+ Info.readMem, Info.writeMem);
+ } else if (!HasChain) {
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ } else if (!I.getType()->isVoidTy()) {
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ } else {
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ }
+
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (!I.getType()->isVoidTy()) {
+ if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ EVT VT = TLI.getValueType(PTy);
+ Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
+ }
+
+ setValue(&I, Result);
+ } else {
+ // Assign order to result here. If the intrinsic does not produce a result,
+ // it won't be mapped to a SDNode and visit() will not assign it an order
+ // number.
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.getNode());
+ }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+/// Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x007fffff, MVT::i32));
+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+ DAG.getConstant(0x3f800000, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+ DebugLoc dl) {
+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x7f800000, MVT::i32));
+ SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, TLI.getPointerTy()));
+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+ DAG.getConstant(127, MVT::i32));
+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+ return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)),
+ MVT::f32);
+}
+
+/// expandExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OFe 1.4426950f
+ // IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x3fb8aa3b));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ SDValue TwoToFracPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // 0.000107046256 error, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ //
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ }
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ t13, IntegerPartOfX));
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
+}
+
+/// expandLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log(2) [0.69314718f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3f317218));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue LogOfMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // LogofMantissa =
+ // -1.1609546f +
+ // (1.4034025f - 0.23903021f * x) * x;
+ //
+ // error 0.0034276066, which is better than 8 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbe74c456));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3fb3a2b1));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // LogOfMantissa =
+ // -1.7417939f +
+ // (2.8212026f +
+ // (-1.4699568f +
+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+ //
+ // error 0.000061011436, which is 14 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbd67b6d6));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ee4f4b8));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fbc278b));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40348e95));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // LogOfMantissa =
+ // -2.1072184f +
+ // (4.2372794f +
+ // (-3.7029485f +
+ // (2.2781945f +
+ // (-0.87823314f +
+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+ //
+ // error 0.0000023660568, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbc91e5ac));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e4350aa));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f60d3e3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x4011cdf0));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x406cfd1c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x408797cb));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
+}
+
+/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Get the exponent.
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ // Different possible minimax approximations of significand in
+ // floating-point for various degrees of accuracy over [1,2].
+ SDValue Log2ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+ //
+ // error 0.0049451742, which is more than 7 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbeb08fe0));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x40019463));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log2ofMantissa =
+ // -2.51285454f +
+ // (4.07009056f +
+ // (-2.12067489f +
+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+ //
+ // error 0.0000876136000, which is better than 13 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbda7262e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f25280b));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x4007b923));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40823e2f));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log2ofMantissa =
+ // -3.0400495f +
+ // (6.1129976f +
+ // (-5.3420409f +
+ // (3.2865683f +
+ // (-1.2669343f +
+ // (0.27515199f -
+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+ //
+ // error 0.0000018516, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbcd2769e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e8ce0b9));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fa22ae7));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40525723));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x40aaf200));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x40c39dad));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
+}
+
+/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log10(2) [0.30102999f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3e9a209a));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue Log10ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log10ofMantissa =
+ // -0.50419619f +
+ // (0.60948995f - 0.10380950f * x) * x;
+ //
+ // error 0.0014886165, which is 6 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbdd49a13));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f1c0789));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log10ofMantissa =
+ // -0.64831180f +
+ // (0.91751397f +
+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+ //
+ // error 0.00019228036, which is better than 12 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3d431f31));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ea21fb2));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f6ae232));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log10ofMantissa =
+ // -0.84299375f +
+ // (1.5327582f +
+ // (-1.0688956f +
+ // (0.49102474f +
+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+ //
+ // error 0.0000037995730, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3c5d51ce));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e00685a));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3efb6798));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f88d192));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fc4316c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
+}
+
+/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ SDValue TwoToFractionalPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ }
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ t13, IntegerPartOfX));
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ bool IsExp10 = false;
+ if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
+ APFloat Ten(10.0f);
+ IsExp10 = LHSC->isExactlyValue(Ten);
+ }
+ }
+
+ if (IsExp10) {
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OF10 3.3219281f
+ // IntegerPartOfX = (int32_t)(x * LOG2OF10);
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
+ getF32Constant(DAG, 0x40549a78));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ SDValue TwoToFractionalPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // twoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ }
+
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ t13, IntegerPartOfX));
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
+}
+
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG) {
+ // If RHS is a constant, we can expand this out to a multiplication tree,
+ // otherwise we end up lowering to a call to __powidf2 (for example). When
+ // optimizing for size, we only want to do this if the expansion would produce
+ // a small number of multiplies, otherwise we do the full expansion.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ // Get the exponent as a positive value.
+ unsigned Val = RHSC->getSExtValue();
+ if ((int)Val < 0) Val = -Val;
+
+ // powi(x, 0) -> 1.0
+ if (Val == 0)
+ return DAG.getConstantFP(1.0, LHS.getValueType());
+
+ const Function *F = DAG.getMachineFunction().getFunction();
+ if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize) ||
+ // If optimizing for size, don't insert too many multiplies. This
+ // inserts up to 5 multiplies.
+ CountPopulation_32(Val)+Log2_32(Val) < 7) {
+ // We use the simple binary decomposition method to generate the multiply
+ // sequence. There are more optimal ways to do this (for example,
+ // powi(x,15) generates one more multiply than it should), but this has
+ // the benefit of being both really simple and much better than a libcall.
+ SDValue Res; // Logically starts equal to 1.0
+ SDValue CurSquare = LHS;
+ while (Val) {
+ if (Val & 1) {
+ if (Res.getNode())
+ Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+ else
+ Res = CurSquare; // 1.0*CurSquare.
+ }
+
+ CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+ CurSquare, CurSquare);
+ Val >>= 1;
+ }
+
+ // If the original was negative, invert the result, producing 1/(x*x*x).
+ if (RHSC->getSExtValue() < 0)
+ Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+ DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+ return Res;
+ }
+ }
+
+ // Otherwise, expand to a libcall.
+ return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+ if (N.getOpcode() != ISD::TRUNCATE)
+ return 0;
+
+ const SDValue &Ext = N.getOperand(0);
+ if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+ const SDValue &CFR = Ext.getOperand(0);
+ if (CFR.getOpcode() == ISD::CopyFromReg)
+ return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+ if (CFR.getOpcode() == ISD::TRUNCATE)
+ return getTruncatedArgReg(CFR);
+ }
+ return 0;
+}
+
+/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
+/// argument, create the corresponding DBG_VALUE machine instruction for it now.
+/// At the end of instruction selection, they will be inserted to the entry BB.
+bool
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset,
+ const SDValue &N) {
+ const Argument *Arg = dyn_cast<Argument>(V);
+ if (!Arg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
+ // Ignore inlined function arguments here.
+ DIVariable DV(Variable);
+ if (DV.isInlinedFnArgument(MF.getFunction()))
+ return false;
+
+ unsigned Reg = 0;
+ // Some arguments' frame index is recorded during argument lowering.
+ Offset = FuncInfo.getArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI->getFrameRegister(MF);
+
+ if (!Reg && N.getNode()) {
+ if (N.getOpcode() == ISD::CopyFromReg)
+ Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ else
+ Reg = getTruncatedArgReg(N);
+ if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+ if (PR)
+ Reg = PR;
+ }
+ }
+
+ if (!Reg) {
+ // Check if ValueMap has reg number.
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end())
+ Reg = VMI->second;
+ }
+
+ if (!Reg && N.getNode()) {
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FINode->getIndex();
+ }
+ }
+
+ if (!Reg)
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
+ TII->get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
+ FuncInfo.ArgDbgValues.push_back(&*MIB);
+ return true;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Res;
+
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ case Intrinsic::vastart: visitVAStart(I); return 0;
+ case Intrinsic::vaend: visitVAEnd(I); return 0;
+ case Intrinsic::vacopy: visitVACopy(I); return 0;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::setjmp:
+ return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
+ case Intrinsic::longjmp:
+ return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
+ case Intrinsic::memcpy: {
+ // Assert for address < 256 since we support only user defined address
+ // spaces.
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+ < 256 &&
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+ < 256 &&
+ "Unknown address space");
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
+ return 0;
+ }
+ case Intrinsic::memset: {
+ // Assert for address < 256 since we support only user defined address
+ // spaces.
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+ < 256 &&
+ "Unknown address space");
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+ MachinePointerInfo(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::memmove: {
+ // Assert for address < 256 since we support only user defined address
+ // spaces.
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+ < 256 &&
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+ < 256 &&
+ "Unknown address space");
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
+ return 0;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ MDNode *Variable = DI.getVariable();
+ const Value *Address = DI.getAddress();
+ if (!Address || !DIVariable(Variable).Verify()) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return 0;
+ }
+
+ // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
+ // but do not always have a corresponding SDNode built. The SDNodeOrder
+ // absolute, but not relative, values are different depending on whether
+ // debug info exists.
+ ++SDNodeOrder;
+
+ // Check if address has undef value.
+ if (isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return 0;
+ }
+
+ SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
+ SDDbgValue *SDV;
+ if (N.getNode()) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ bool isParameter =
+ (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+ isa<Argument>(Address));
+
+ const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+
+ if (isParameter && !AI) {
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (FINode)
+ // Byval parameter. We have a frame index at this point.
+ SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
+ 0, dl, SDNodeOrder);
+ else {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, 0, N);
+ return 0;
+ }
+ } else if (AI)
+ SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+ 0, dl, SDNodeOrder);
+ else {
+ // Can't do anything with other non-AI cases yet.
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+ DEBUG(Address->dump());
+ return 0;
+ }
+ DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+ } else {
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+ // If variable is pinned by a alloca in dominating bb then
+ // use StaticAllocaMap.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ if (AI->getParent() != DI.getParent()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ SDV = DAG.getDbgValue(Variable, SI->second,
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ return 0;
+ }
+ }
+ }
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ }
+ }
+ return 0;
+ }
+ case Intrinsic::dbg_value: {
+ const DbgValueInst &DI = cast<DbgValueInst>(I);
+ if (!DIVariable(DI.getVariable()).Verify())
+ return 0;
+
+ MDNode *Variable = DI.getVariable();
+ uint64_t Offset = DI.getOffset();
+ const Value *V = DI.getValue();
+ if (!V)
+ return 0;
+
+ // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
+ // but do not always have a corresponding SDNode built. The SDNodeOrder
+ // absolute, but not relative, values are different depending on whether
+ // debug info exists.
+ ++SDNodeOrder;
+ SDDbgValue *SDV;
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
+ SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ } else {
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
+ SDV = DAG.getDbgValue(Variable, N.getNode(),
+ N.getResNo(), Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ }
+ } else if (!V->use_empty() ) {
+ // Do not call getValue(V) yet, as we don't want to generate code.
+ // Remember it for later.
+ DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+ DanglingDebugInfoMap[V] = DDI;
+ } else {
+ // We may expand this to cover more cases. One case where we have no
+ // data available is an unreferenced parameter.
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ }
+ }
+
+ // Build a debug info table entry.
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ V = BCI->getOperand(0);
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI) {
+ DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
+ DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
+ return 0;
+ }
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return 0; // VLAs.
+ int FI = SI->second;
+
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+ MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+ return 0;
+ }
+
+ case Intrinsic::eh_typeid_for: {
+ // Find the type id for the given typeinfo.
+ GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
+ unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+ Res = DAG.getConstant(TypeID, MVT::i32);
+ setValue(&I, Res);
+ return 0;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return 0;
+ case Intrinsic::eh_unwind_init:
+ DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+ return 0;
+ case Intrinsic::eh_dwarf_cfa: {
+ SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
+ TLI.getPointerTy());
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ TLI.getPointerTy()),
+ CfaArg);
+ SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
+ TLI.getPointerTy(),
+ DAG.getConstant(0, TLI.getPointerTy()));
+ setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+ FA, Offset));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_callsite: {
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
+ assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+ assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+ MMI.setCurrentCallSite(CI->getZExtValue());
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_functioncontext: {
+ // Get and store the index of the function context.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ AllocaInst *FnCtx =
+ cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+ int FI = FuncInfo.StaticAllocaMap[FnCtx];
+ MFI->setFunctionContextIndex(FI);
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_setjmp: {
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Ops, 2);
+ setValue(&I, Op.getValue(0));
+ DAG.setRoot(Op.getValue(1));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_longjmp: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
+ return 0;
+ }
+
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = getValue(I.getArgOperand(1));
+ if (isa<ConstantSDNode>(ShAmt)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ }
+ unsigned NewIntrinsic = 0;
+ EVT ShAmtVT = MVT::v2i32;
+ switch (Intrinsic) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ // We must do this early because v2i32 is not a legal type.
+ SDValue ShOps[2];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ EVT DestVT = TLI.getValueType(I.getType());
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(NewIntrinsic, MVT::i32),
+ getValue(I.getArgOperand(0)), ShAmt);
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::x86_avx_vinsertf128_pd_256:
+ case Intrinsic::x86_avx_vinsertf128_ps_256:
+ case Intrinsic::x86_avx_vinsertf128_si_256:
+ case Intrinsic::x86_avx2_vinserti128: {
+ EVT DestVT = TLI.getValueType(I.getType());
+ EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+ ElVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getIntPtrConstant(Idx));
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::x86_avx_vextractf128_pd_256:
+ case Intrinsic::x86_avx_vextractf128_ps_256:
+ case Intrinsic::x86_avx_vextractf128_si_256:
+ case Intrinsic::x86_avx2_vextracti128: {
+ EVT DestVT = TLI.getValueType(I.getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
+ DestVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ DAG.getIntPtrConstant(Idx));
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::convertff:
+ case Intrinsic::convertfsi:
+ case Intrinsic::convertfui:
+ case Intrinsic::convertsif:
+ case Intrinsic::convertuif:
+ case Intrinsic::convertss:
+ case Intrinsic::convertsu:
+ case Intrinsic::convertus:
+ case Intrinsic::convertuu: {
+ ISD::CvtCode Code = ISD::CVT_INVALID;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::convertff: Code = ISD::CVT_FF; break;
+ case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+ case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+ case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+ case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+ case Intrinsic::convertss: Code = ISD::CVT_SS; break;
+ case Intrinsic::convertsu: Code = ISD::CVT_SU; break;
+ case Intrinsic::convertus: Code = ISD::CVT_US; break;
+ case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
+ }
+ EVT DestVT = TLI.getValueType(I.getType());
+ const Value *Op1 = I.getArgOperand(0);
+ Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1),
+ DAG.getValueType(DestVT),
+ DAG.getValueType(getValue(Op1).getValueType()),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)),
+ Code);
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::powi:
+ setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG));
+ return 0;
+ case Intrinsic::log:
+ setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::log2:
+ setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::log10:
+ setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::exp:
+ setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::exp2:
+ setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return 0;
+ case Intrinsic::pow:
+ setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG, TLI));
+ return 0;
+ case Intrinsic::sqrt:
+ case Intrinsic::fabs:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::fabs: Opcode = ISD::FABS; break;
+ case Intrinsic::sin: Opcode = ISD::FSIN; break;
+ case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ }
+
+ setValue(&I, DAG.getNode(Opcode, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::fma:
+ setValue(&I, DAG.getNode(ISD::FMA, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ return 0;
+ case Intrinsic::fmuladd: {
+ EVT VT = TLI.getValueType(I.getType());
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanMulAndAdd(VT)){
+ setValue(&I, DAG.getNode(ISD::FMA, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ } else {
+ SDValue Mul = DAG.getNode(ISD::FMUL, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)));
+ SDValue Add = DAG.getNode(ISD::FADD, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ Mul,
+ getValue(I.getArgOperand(2)));
+ setValue(&I, Add);
+ }
+ return 0;
+ }
+ case Intrinsic::convert_to_fp16:
+ setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
+ MVT::i16, getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::convert_from_fp16:
+ setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
+ MVT::f32, getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::pcmarker: {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+ DAG.getVTList(MVT::i64, MVT::Other),
+ &Op, 1);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::cttz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+ dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::ctlz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+ dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::ctpop: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
+ return 0;
+ }
+ case Intrinsic::stacksave: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::STACKSAVE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return 0;
+ }
+ case Intrinsic::stackrestore: {
+ Res = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
+ return 0;
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code into the DAG to store the stack guard onto the stack.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ EVT PtrTy = TLI.getPointerTy();
+
+ SDValue Src = getValue(I.getArgOperand(0)); // The guard's value.
+ AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MFI->setStackProtectorIndex(FI);
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+ // Store the stack protector onto the stack.
+ Res = DAG.getStore(getRoot(), dl, Src, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ true, false, 0);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return 0;
+ }
+ case Intrinsic::objectsize: {
+ // If we don't know by now, we're never going to know.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+
+ assert(CI && "Non-constant type in __builtin_object_size?");
+
+ SDValue Arg = getValue(I.getCalledValue());
+ EVT Ty = Arg.getValueType();
+
+ if (CI->isZero())
+ Res = DAG.getConstant(-1ULL, Ty);
+ else
+ Res = DAG.getConstant(0, Ty);
+
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::var_annotation:
+ // Discard annotate attributes
+ return 0;
+
+ case Intrinsic::init_trampoline: {
+ const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
+
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
+ Ops[5] = DAG.getSrcValue(F);
+
+ Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
+
+ DAG.setRoot(Res);
+ return 0;
+ }
+ case Intrinsic::adjust_trampoline: {
+ setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
+ TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::gcroot:
+ if (GFI) {
+ const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ }
+ return 0;
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+ case Intrinsic::flt_rounds:
+ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+ return 0;
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ setValue(&I, getValue(I.getArgOperand(0)));
+ return 0;
+ }
+
+ case Intrinsic::debugtrap:
+ case Intrinsic::trap: {
+ StringRef TrapFuncName = TM.Options.getTrapFunctionName();
+ if (TrapFuncName.empty()) {
+ ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
+ ISD::TRAP : ISD::DEBUGTRAP;
+ DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot()));
+ return 0;
+ }
+ TargetLowering::ArgListTy Args;
+ TargetLowering::
+ CallLoweringInfo CLI(getRoot(), I.getType(),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+ Args, DAG, dl);
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+ DAG.setRoot(Result.second);
+ return 0;
+ }
+
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow: {
+ ISD::NodeType Op;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+ case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+ case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+ case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+ case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+ }
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2));
+ return 0;
+ }
+ case Intrinsic::prefetch: {
+ SDValue Ops[5];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = getValue(I.getArgOperand(3));
+ DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+ DAG.getVTList(MVT::Other),
+ &Ops[0], 5,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ false, /* volatile */
+ rw==0, /* read */
+ rw==1)); /* write */
+ return 0;
+ }
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end: {
+ bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
+ // Stack coloring is not enabled in O0, discard region information.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return 0;
+
+ SmallVector<Value *, 4> Allocas;
+ GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD);
+
+ for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(),
+ E = Allocas.end(); Object != E; ++Object) {
+ AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+
+ // Could not find an Alloca.
+ if (!LifetimeObject)
+ continue;
+
+ int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
+
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
+ unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+
+ Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
+ DAG.setRoot(Res);
+ }
+ return 0;
+ }
+ case Intrinsic::invariant_start:
+ // Discard region information.
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+ return 0;
+ case Intrinsic::invariant_end:
+ // Discard region information.
+ return 0;
+ case Intrinsic::donothing:
+ // ignore
+ return 0;
+ }
+}
+
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+ bool isTailCall,
+ MachineBasicBlock *LandingPad) {
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FTy->getReturnType();
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ MCSymbol *BeginLabel = 0;
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI);
+
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ DAG.getMachineFunction(),
+ FTy->isVarArg(), Outs,
+ FTy->getContext());
+
+ SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
+
+ if (!CanLowerReturn) {
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(
+ FTy->getReturnType());
+ unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(
+ FTy->getReturnType());
+ MachineFunction &MF = DAG.getMachineFunction();
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+ DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isInReg = false;
+ Entry.isSRet = true;
+ Entry.isNest = false;
+ Entry.isByVal = false;
+ Entry.Alignment = Align;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(FTy->getContext());
+ }
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ const Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
+
+ unsigned attrInd = i - CS.arg_begin() + 1;
+ Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
+ Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
+ Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+ Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
+ Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
+ Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+ Entry.Alignment = CS.getParamAlignment(attrInd);
+ Args.push_back(Entry);
+ }
+
+ if (LandingPad) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI.getContext().CreateTempSymbol();
+
+ // For SjLj, keep track of which landing pads go with which invokes
+ // so as to maintain the ordering of pads in the LSDA.
+ unsigned CallSiteIndex = MMI.getCurrentCallSite();
+ if (CallSiteIndex) {
+ MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+
+ // Now that the call site is handled, stop tracking it.
+ MMI.setCurrentCallSite(0);
+ }
+
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI.LowerCallTo.
+ if (isTailCall && !isInTailCallPosition(CS, TLI))
+ isTailCall = false;
+
+ TargetLowering::
+ CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
+ getCurDebugLoc(), CS);
+ std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI);
+ assert((isTailCall || Result.second.getNode()) &&
+ "Non-null chain expected with non-tail call!");
+ assert((Result.second.getNode() || !Result.first.getNode()) &&
+ "Null value expected with tail call!");
+ if (Result.first.getNode()) {
+ setValue(CS.getInstruction(), Result.first);
+ } else if (!CanLowerReturn && Result.second.getNode()) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+ ComputeValueVTs(TLI, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+
+ SmallVector<EVT, 4> RetTys;
+ SmallVector<uint64_t, 4> Offsets;
+ RetTy = FTy->getReturnType();
+ ComputeValueVTs(TLI, RetTy, RetTys, &Offsets);
+
+ unsigned NumValues = RetTys.size();
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
+ DemoteStackSlot,
+ DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add,
+ MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+ false, false, false, 1);
+ Values[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ PendingLoads.push_back(Chain);
+
+ setValue(CS.getInstruction(),
+ DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&RetTys[0], RetTys.size()),
+ &Values[0], Values.size()));
+ }
+
+ // Assign order to nodes here. If the call does not produce a result, it won't
+ // be mapped to a SDNode and visit() will not assign it an order number.
+ if (!Result.second.getNode()) {
+ // As a special case, a null chain means that a tail call has been emitted and
+ // the DAG root is already updated.
+ HasTailCall = true;
+ ++SDNodeOrder;
+ AssignOrderingToNode(DAG.getRoot().getNode());
+ } else {
+ DAG.setRoot(Result.second);
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.second.getNode());
+ }
+
+ if (LandingPad) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
+ DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
+
+ // Inform MachineModuleInfo of range.
+ MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+ }
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+ Type *LoadTy,
+ SelectionDAGBuilder &Builder) {
+
+ // Check to see if this load can be trivially constant folded, e.g. if the
+ // input is from a string literal.
+ if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+ // Cast pointer to the type we really want to load.
+ LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
+ PointerType::getUnqual(LoadTy));
+
+ if (const Constant *LoadCst =
+ ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+ Builder.TD))
+ return Builder.getValue(LoadCst);
+ }
+
+ // Otherwise, we have to emit the load. If the pointer is to unfoldable but
+ // still constant memory, the input chain can be the entry node.
+ SDValue Root;
+ bool ConstantMemory = false;
+
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+ Root = Builder.DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = Builder.DAG.getRoot();
+ }
+
+ SDValue Ptr = Builder.getValue(PtrVal);
+ SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
+ Ptr, MachinePointerInfo(PtrVal),
+ false /*volatile*/,
+ false /*nontemporal*/,
+ false /*isinvariant*/, 1 /* align=1 */);
+
+ if (!ConstantMemory)
+ Builder.PendingLoads.push_back(LoadVal.getValue(1));
+ return LoadVal;
+}
+
+
+/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+ // Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
+ if (I.getNumArgOperands() != 3)
+ return false;
+
+ const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
+ if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+ !I.getArgOperand(2)->getType()->isIntegerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
+
+ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
+ // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
+ if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+ bool ActuallyDoIt = true;
+ MVT LoadVT;
+ Type *LoadTy;
+ switch (Size->getZExtValue()) {
+ default:
+ LoadVT = MVT::Other;
+ LoadTy = 0;
+ ActuallyDoIt = false;
+ break;
+ case 2:
+ LoadVT = MVT::i16;
+ LoadTy = Type::getInt16Ty(Size->getContext());
+ break;
+ case 4:
+ LoadVT = MVT::i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ break;
+ case 8:
+ LoadVT = MVT::i64;
+ LoadTy = Type::getInt64Ty(Size->getContext());
+ break;
+ /*
+ case 16:
+ LoadVT = MVT::v4i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ LoadTy = VectorType::get(LoadTy, 4);
+ break;
+ */
+ }
+
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+
+ // Require that we can find a legal MVT, and only do this if the target
+ // supports unaligned loads of that type. Expanding into byte loads would
+ // bloat the code.
+ if (ActuallyDoIt && Size->getZExtValue() > 4) {
+ // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+ // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+ if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
+ ActuallyDoIt = false;
+ }
+
+ if (ActuallyDoIt) {
+ SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
+ SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+
+ SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
+ ISD::SETNE);
+ EVT CallVT = TLI.getValueType(I.getType(), true);
+ setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
+ return true;
+ }
+ }
+
+
+ return false;
+}
+
+/// visitUnaryFloatCall - If a call instruction is a unary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a unary floating-point call.
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp));
+ return true;
+}
+
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
+ // Handle inline assembly differently.
+ if (isa<InlineAsm>(I.getCalledValue())) {
+ visitInlineAsm(&I);
+ return;
+ }
+
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ComputeUsesVAFloatArgument(I, &MMI);
+
+ const char *RenameFn = 0;
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration()) {
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
+ if (unsigned IID = II->getIntrinsicID(F)) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+ if (unsigned IID = F->getIntrinsicID()) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+
+ // Check for well-known libc/libm calls. If the function is internal, it
+ // can't be a library call.
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ switch (Func) {
+ default: break;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
+ if (I.getNumArgOperands() == 2 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.getType() == I.getArgOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue LHS = getValue(I.getArgOperand(0));
+ SDValue RHS = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+ LHS.getValueType(), LHS, RHS));
+ return;
+ }
+ break;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ if (visitUnaryFloatCall(I, ISD::FABS))
+ return;
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ if (visitUnaryFloatCall(I, ISD::FSIN))
+ return;
+ break;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ if (visitUnaryFloatCall(I, ISD::FCOS))
+ return;
+ break;
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ if (visitUnaryFloatCall(I, ISD::FSQRT))
+ return;
+ break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ if (visitUnaryFloatCall(I, ISD::FFLOOR))
+ return;
+ break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
+ return;
+ break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ if (visitUnaryFloatCall(I, ISD::FCEIL))
+ return;
+ break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ if (visitUnaryFloatCall(I, ISD::FRINT))
+ return;
+ break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ if (visitUnaryFloatCall(I, ISD::FTRUNC))
+ return;
+ break;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ if (visitUnaryFloatCall(I, ISD::FLOG2))
+ return;
+ break;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ if (visitUnaryFloatCall(I, ISD::FEXP2))
+ return;
+ break;
+ case LibFunc::memcmp:
+ if (visitMemCmpCall(I))
+ return;
+ break;
+ }
+ }
+ }
+
+ SDValue Callee;
+ if (!RenameFn)
+ Callee = getValue(I.getCalledValue());
+ else
+ Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+ // Check if we can potentially perform a tail call. More detailed checking is
+ // be done within LowerCallTo, after more information about the call is known.
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+namespace {
+
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
+public:
+ /// CallOperand - If this is the result output operand or a clobber
+ /// this is null, otherwise it is the incoming operand to the CallInst.
+ /// This gets modified as the asm is processed.
+ SDValue CallOperand;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
+ : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+ }
+
+ /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+ /// corresponds to. If there is no Value* for this operand, it returns
+ /// MVT::Other.
+ EVT getCallOperandValEVT(LLVMContext &Context,
+ const TargetLowering &TLI,
+ const DataLayout *TD) const {
+ if (CallOperandVal == 0) return MVT::Other;
+
+ if (isa<BasicBlock>(CallOperandVal))
+ return TLI.getPointerTy();
+
+ llvm::Type *OpTy = CallOperandVal->getType();
+
+ // FIXME: code duplicated from TargetLowering::ParseConstraints().
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (isIndirect) {
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpTy = IntegerType::get(Context, BitSize);
+ break;
+ }
+ }
+
+ return TLI.getValueType(OpTy, true);
+ }
+};
+
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
+} // end anonymous namespace
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand. We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process. However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation. This produces generally horrible, but correct, code.
+///
+/// OpInfo describes the operand.
+///
+static void GetRegistersForValue(SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ DebugLoc DL,
+ SDISelAsmOperandInfo &OpInfo) {
+ LLVMContext &Context = *DAG.getContext();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<unsigned, 4> Regs;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other) {
+ // If this is a FP input in an integer register (or visa versa) insert a bit
+ // cast of the input value. More generally, handle any case where the input
+ // value disagrees with the register class we plan to stick this in.
+ if (OpInfo.Type == InlineAsm::isInput &&
+ PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+ // Try to convert to the first EVT that the reg class contains. If the
+ // types are identical size, use a bitcast to convert (e.g. two differing
+ // vector types).
+ MVT RegVT = *PhysReg.second->vt_begin();
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+ // If the input is a FP value and we want it in FP registers, do a
+ // bitcast to the corresponding integer type. This turns an f64 value
+ // into i64, which can be passed with two i32 values on a 32-bit
+ // machine.
+ RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ }
+ }
+
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+ }
+
+ MVT RegVT;
+ EVT ValueVT = OpInfo.ConstraintVT;
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+ if (unsigned AssignedReg = PhysReg.first) {
+ const TargetRegisterClass *RC = PhysReg.second;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = *RC->vt_begin();
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ RegVT = *RC->vt_begin();
+
+ // This is a explicit reference to a physical register.
+ Regs.push_back(AssignedReg);
+
+ // If this is an expanded reference, add the rest of the regs to Regs.
+ if (NumRegs != 1) {
+ TargetRegisterClass::iterator I = RC->begin();
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "Didn't find reg!");
+
+ // Already added the first reg.
+ --NumRegs; ++I;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Regs.push_back(*I);
+ }
+ }
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // Otherwise, if this was a reference to an LLVM register class, create vregs
+ // for this reference.
+ if (const TargetRegisterClass *RC = PhysReg.second) {
+ RegVT = *RC->vt_begin();
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Create the appropriate number of virtual registers.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (; NumRegs; --NumRegs)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ SDISelAsmOperandInfoVector ConstraintOperands;
+
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI.ParseConstraints(CS);
+
+ bool hasMemory = false;
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ MVT OpVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpVT = TLI.getSimpleValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // If this is an input or an indirect output, process the call argument.
+ // BasicBlocks are labels, currently appearing only in asm's.
+ if (OpInfo.CallOperandVal) {
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ }
+
+ OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD).
+ getSimpleVT();
+ }
+
+ OpInfo.ConstraintVT = OpVT;
+
+ // Indirect operand accesses access memory.
+ if (OpInfo.isIndirect)
+ hasMemory = true;
+ else {
+ for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType
+ CType = TLI.getConstraintType(OpInfo.Codes[j]);
+ if (CType == TargetLowering::C_Memory) {
+ hasMemory = true;
+ break;
+ }
+ }
+ }
+ }
+
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
+ // Second pass over the constraints: compute which constraint option to use
+ // and assign registers to constraints that want a specific physreg.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+ Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ Input.ConstraintVT = OpInfo.ConstraintVT;
+ }
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert((OpInfo.isMultipleAlternative ||
+ (OpInfo.Type == InlineAsm::isInput)) &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value. If we don't have
+ // an indirect input, put it in the constpool if we can, otherwise spill
+ // it to a stack slot.
+ // TODO: This isn't quite right. We need to handle these according to
+ // the addressing mode that the constraint wants. Also, this may take
+ // an additional register for the computation and we don't want that
+ // either.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ const Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+ TLI.getPointerTy());
+ } else {
+ // Otherwise, create a stack slot and emit a store to it before the
+ // asm.
+ Type *Ty = OpVal->getType();
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Chain = DAG.getStore(Chain, getCurDebugLoc(),
+ OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ OpInfo.CallOperand = StackSlot;
+ }
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = 0;
+
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ // If this constraint is for a specific register, allocate it before
+ // anything else.
+ if (OpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
+ }
+
+ // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // to register class operands.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // C_Register operands have already been allocated, Other/Memory don't need
+ // to be.
+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(
+ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+ TLI.getPointerTy()));
+
+ // If we have a !srcloc metadata node associated with it, we want to attach
+ // this to the ultimately generated inline asm machineinstr. To do this, we
+ // pass in the third operand as this (potentially null) inline asm MDNode.
+ const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+ AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+
+ // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+ // bits as operand 3.
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ // Set the asm dialect.
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+
+ // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+ // Ideally, we would only check against memory constraints. However, the
+ // meaning of an other constraint can be target-specific and we can't easily
+ // reason about it. Therefore, be conservative and set MayLoad/MayStore
+ // for other constriants as well.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.Type == InlineAsm::isInput)
+ ExtraInfo |= InlineAsm::Extra_MayLoad;
+ else if (OpInfo.Type == InlineAsm::isOutput)
+ ExtraInfo |= InlineAsm::Extra_MayStore;
+ else if (OpInfo.Type == InlineAsm::isClobber)
+ ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+ }
+ }
+
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+ TLI.getPointerTy()));
+
+ // Loop over all of the inputs, copying the operand values into the
+ // appropriate registers and processing the output regs.
+ RegsForValue RetValRegs;
+
+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput: {
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ // Memory output, or 'other' output (e.g. 'X' constraint).
+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ }
+
+ // Otherwise, this is a register or register class output.
+
+ // Copy the output from the appropriate register. Find a register that
+ // we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
+
+ // If this is an indirect operand, store through the pointer after the
+ // asm.
+ if (OpInfo.isIndirect) {
+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+ OpInfo.CallOperandVal));
+ } else {
+ // This is the result value of the call.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ // Concatenate this output onto the outputs list.
+ RetValRegs.append(OpInfo.AssignedRegs);
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+ InlineAsm::Kind_RegDefEarlyClobber :
+ InlineAsm::Kind_RegDef,
+ false,
+ 0,
+ DAG,
+ AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isInput: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ unsigned OperandNo = OpInfo.getMatchedOperand();
+
+ // Scan until we find the definition we already emitted of this operand.
+ // When we find it, create a RegsForValue operand.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert((InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+ InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+ }
+
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if (InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ if (OpInfo.isIndirect) {
+ // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:"
+ " don't know how to handle tied "
+ "indirect register inputs");
+ report_fatal_error("Cannot handle indirect register inputs!");
+ }
+
+ RegsForValue MatchedRegs;
+ MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+ MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
+ MatchedRegs.RegVTs.push_back(RegVT);
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+ i != e; ++i)
+ MatchedRegs.Regs.push_back
+ (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag, CS.getInstruction());
+ MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
+ true, OpInfo.getMatchedOperand(),
+ DAG, AsmNodeOperands);
+ break;
+ }
+
+ assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+ assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+ OpInfo.getMatchedOperand());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+
+ // Treat indirect 'X' constraint as memory.
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
+ OpInfo.ConstraintType = TargetLowering::C_Memory;
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+ Ops, DAG);
+ if (Ops.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+ "Memory operands expect pointer values");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+
+ // TODO: Support this.
+ if (OpInfo.isIndirect) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "Don't know how to handle indirect register inputs yet "
+ "for constraint '" + Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag, CS.getInstruction());
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
+ DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber: {
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+ false, 0, DAG,
+ AsmNodeOperands);
+ break;
+ }
+ }
+ }
+
+ // Finish up input operands. Set the input chain and add the flag last.
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+ Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+ DAG.getVTList(MVT::Other, MVT::Glue),
+ &AsmNodeOperands[0], AsmNodeOperands.size());
+ Flag = Chain.getValue(1);
+
+ // If this asm returns a register value, copy the result from that register
+ // and set it as the value of the call.
+ if (!RetValRegs.Regs.empty()) {
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+ Chain, &Flag, CS.getInstruction());
+
+ // FIXME: Why don't we do this for inline asms with MRVs?
+ if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+ EVT ResultType = TLI.getValueType(CS.getType());
+
+ // If any of the results of the inline asm is a vector, it may have the
+ // wrong width/num elts. This can happen for register classes that can
+ // contain multiple different value types. The preg or vreg allocated may
+ // not have the same VT as was expected. Convert it to the right type
+ // with bit_convert.
+ if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+ ResultType, Val);
+
+ } else if (ResultType != Val.getValueType() &&
+ ResultType.isInteger() && Val.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result may
+ // have a wider width than the expected result. Extract the relevant
+ // portion.
+ Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+ }
+
+ assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+ }
+
+ setValue(CS.getInstruction(), Val);
+ // Don't need to use this as a chain in this case.
+ if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+ return;
+ }
+
+ std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+
+ // Process indirect outputs, first output all of the flagged copies out of
+ // physregs.
+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+ const Value *Ptr = IndirectStoresToEmit[i].second;
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+ Chain, &Flag, IA);
+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+ }
+
+ // Emit the non-flagged stores from the physregs.
+ SmallVector<SDValue, 8> OutChains;
+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
+ SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
+ StoresToEmit[i].first,
+ getValue(StoresToEmit[i].second),
+ MachinePointerInfo(StoresToEmit[i].second),
+ false, false, 0);
+ OutChains.push_back(Val);
+ }
+
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &OutChains[0], OutChains.size());
+
+ DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+ const DataLayout &TD = *TLI.getDataLayout();
+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+ getRoot(), getValue(I.getOperand(0)),
+ DAG.getSrcValue(I.getOperand(0)),
+ TD.getABITypeAlignment(I.getType()));
+ setValue(&I, V);
+ DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getSrcValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(1))));
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
+ // Handle all of the outgoing arguments.
+ CLI.Outs.clear();
+ CLI.OutVals.clear();
+ ArgListTy &Args = CLI.Args;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
+ SDValue Op = SDValue(Args[i].Node.getNode(),
+ Args[i].Node.getResNo() + Value);
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ getDataLayout()->getABITypeAlignment(ArgTy);
+
+ if (Args[i].isZExt)
+ Flags.setZExt();
+ if (Args[i].isSExt)
+ Flags.setSExt();
+ if (Args[i].isInReg)
+ Flags.setInReg();
+ if (Args[i].isSRet)
+ Flags.setSRet();
+ if (Args[i].isByVal) {
+ Flags.setByVal();
+ PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
+ // For ByVal, alignment should come from FE. BE will guess if this
+ // info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (Args[i].Alignment)
+ FrameAlign = Args[i].Alignment;
+ else
+ FrameAlign = getByValTypeAlignment(ElementTy);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (Args[i].isNest)
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
+ PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
+
+ for (unsigned j = 0; j != NumParts; ++j) {
+ // if it isn't first piece, alignment must be 1
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+ i < CLI.NumFixedArgs,
+ i, j*Parts[j].getValueType().getStoreSize());
+ if (NumParts > 1 && j == 0)
+ MyFlags.Flags.setSplit();
+ else if (j != 0)
+ MyFlags.Flags.setOrigAlign(1);
+
+ CLI.Outs.push_back(MyFlags);
+ CLI.OutVals.push_back(Parts[j]);
+ }
+ }
+ }
+
+ // Handle the incoming return values from the call.
+ CLI.Ins.clear();
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(*this, CLI.RetTy, RetTys);
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+
+ SmallVector<SDValue, 4> InVals;
+ CLI.Chain = LowerCall(CLI, InVals);
+
+ // Verify that the target's LowerCall behaved as expected.
+ assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
+ "LowerCall didn't return a valid chain!");
+ assert((!CLI.IsTailCall || InVals.empty()) &&
+ "LowerCall emitted a return value for a tail call!");
+ assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
+ "LowerCall didn't emit the correct number of values!");
+
+ // For a tail call, the return value is merely live-out and there aren't
+ // any nodes in the DAG representing it. Return a special value to
+ // indicate that a tail call has been emitted and no more Instructions
+ // should be processed in the current block.
+ if (CLI.IsTailCall) {
+ CLI.DAG.setRoot(CLI.Chain);
+ return std::make_pair(SDValue(), SDValue());
+ }
+
+ DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerCall emitted a null value!");
+ assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerCall emitted a value with the wrong type!");
+ });
+
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (CLI.RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (CLI.RetZExt)
+ AssertOp = ISD::AssertZext;
+ SmallVector<SDValue, 4> ReturnValues;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+
+ ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+ NumRegs, RegisterVT, VT, NULL,
+ AssertOp));
+ CurReg += NumRegs;
+ }
+
+ // For a function returning void, there is no return value. We can't create
+ // such a node, so we just return a null return value in that case. In
+ // that case, nothing will actually look at the value.
+ if (ReturnValues.empty())
+ return std::make_pair(SDValue(), CLI.Chain);
+
+ SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
+ CLI.DAG.getVTList(&RetTys[0], RetTys.size()),
+ &ReturnValues[0], ReturnValues.size());
+ return std::make_pair(Res, CLI.Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ llvm_unreachable("LowerOperation not implemented for this target!");
+}
+
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
+ SDValue Op = getNonRegisterValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+ RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V);
+ PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ if (FastISel)
+ return A->use_empty();
+
+ const BasicBlock *Entry = A->getParent()->begin();
+ for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+ return false; // Use not in entry block.
+ }
+ return true;
+}
+
+void SelectionDAGISel::LowerArguments(const Function &F) {
+ SelectionDAG &DAG = SDB->DAG;
+ DebugLoc dl = SDB->getCurDebugLoc();
+ const DataLayout *TD = TLI.getDataLayout();
+ SmallVector<ISD::InputArg, 16> Ins;
+
+ if (!FuncInfo->CanLowerReturn) {
+ // Put in an sret pointer parameter before all the other parameters.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+ // NOTE: Assuming that a pointer will never break down to more than one VT
+ // or one register.
+ ISD::ArgFlagsTy Flags;
+ Flags.setSRet();
+ MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
+ ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
+ Ins.push_back(RetArg);
+ }
+
+ // Set up the incoming argument description vector.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++Idx) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I->getType(), ValueVTs);
+ bool isArgValueUsed = !I->use_empty();
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ TD->getABITypeAlignment(ArgTy);
+
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+ Flags.setZExt();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+ Flags.setSExt();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
+ Flags.setInReg();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
+ Flags.setSRet();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+ Flags.setByVal();
+ PointerType *Ty = cast<PointerType>(I->getType());
+ Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
+ // For ByVal, alignment should be passed from FE. BE will guess if
+ // this info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (F.getParamAlignment(Idx))
+ FrameAlign = F.getParamAlignment(Idx);
+ else
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
+ Idx-1, i*RegisterVT.getStoreSize());
+ if (NumRegs > 1 && i == 0)
+ MyFlags.Flags.setSplit();
+ // if it isn't first piece, alignment must be 1
+ else if (i > 0)
+ MyFlags.Flags.setOrigAlign(1);
+ Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // Call the target to set up the argument values.
+ SmallVector<SDValue, 8> InVals;
+ SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+ F.isVarArg(), Ins,
+ dl, DAG, InVals);
+
+ // Verify that the target's LowerFormalArguments behaved as expected.
+ assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+ "LowerFormalArguments didn't return a valid chain!");
+ assert(InVals.size() == Ins.size() &&
+ "LowerFormalArguments didn't emit the correct number of values!");
+ DEBUG({
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerFormalArguments emitted a null value!");
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerFormalArguments emitted a value with the wrong type!");
+ }
+ });
+
+ // Update the DAG with the new chain value resulting from argument lowering.
+ DAG.setRoot(NewRoot);
+
+ // Set up the argument values.
+ unsigned i = 0;
+ Idx = 1;
+ if (!FuncInfo->CanLowerReturn) {
+ // Create a virtual register for the sret pointer, and put in a copy
+ // from the sret argument into it.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ MVT VT = ValueVTs[0].getSimpleVT();
+ MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
+ RegVT, VT, NULL, AssertOp);
+
+ MachineFunction& MF = SDB->DAG.getMachineFunction();
+ MachineRegisterInfo& RegInfo = MF.getRegInfo();
+ unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+ FuncInfo->DemoteRegister = SRetReg;
+ NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
+ SRetReg, ArgValue);
+ DAG.setRoot(NewRoot);
+
+ // i indexes lowered arguments. Bump it past the hidden sret argument.
+ // Idx indexes LLVM arguments. Don't touch it.
+ ++i;
+ }
+
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+ ++I, ++Idx) {
+ SmallVector<SDValue, 4> ArgValues;
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+
+ // If this argument is unused then remember its value. It is used to generate
+ // debugging information.
+ if (I->use_empty() && NumValues)
+ SDB->setUnusedArgValue(I, InVals[i]);
+
+ for (unsigned Val = 0; Val != NumValues; ++Val) {
+ EVT VT = ValueVTs[Val];
+ MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+ if (!I->use_empty()) {
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+ AssertOp = ISD::AssertSext;
+ else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+ AssertOp = ISD::AssertZext;
+
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
+ NumParts, PartVT, VT,
+ NULL, AssertOp));
+ }
+
+ i += NumParts;
+ }
+
+ // We don't need to do anything else for unused arguments.
+ if (ArgValues.empty())
+ continue;
+
+ // Note down frame index.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+
+ SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+ SDB->getCurDebugLoc());
+
+ SDB->setValue(I, Res);
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ if (LoadSDNode *LNode =
+ dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ }
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // wherever we got it to the vreg that other BB's will reference it as.
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ // If we can, though, try to skip creating an unnecessary vreg.
+ // FIXME: This isn't very clean... it would be nice to make this more
+ // general. It's also subtly incompatible with the hacks FastISel
+ // uses with vregs.
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ FuncInfo->ValueMap[I] = Reg;
+ continue;
+ }
+ }
+ if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(I);
+ SDB->CopyToExportRegsIfNeeded(I);
+ }
+ }
+
+ assert(i == InVals.size() && "Argument register count mismatch!");
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ // FIXME: this should insert code into the DAG!
+ EmitFunctionEntryCode();
+}
+
+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+/// ensure constants are generated when needed. Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input. We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB. As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::const_iterator I = SuccBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
+ unsigned Reg;
+ const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo.CreateRegs(C->getType());
+ CopyValueToVirtualRegister(C, RegOut);
+ }
+ Reg = RegOut;
+ } else {
+ DenseMap<const Value *, unsigned>::iterator I =
+ FuncInfo.ValueMap.find(PHIOp);
+ if (I != FuncInfo.ValueMap.end())
+ Reg = I->second;
+ else {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo.CreateRegs(PHIOp->getType());
+ CopyValueToVirtualRegister(PHIOp, Reg);
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+ Reg += NumRegisters;
+ }
+ }
+ }
+ ConstantsOut.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 0000000..9188945
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,559 @@
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILDER_H
+#define SELECTIONDAGBUILDER_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class DbgValueInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class Function;
+class FunctionLoweringInfo;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class IndirectBrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineRegisterInfo;
+class MDNode;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDDbgValue;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class DataLayout;
+class TargetLibraryInfo;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class VAArgInst;
+class ZExtInst;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+///
+class SelectionDAGBuilder {
+ /// CurDebugLoc - current file + line number. Changes as we build the DAG.
+ DebugLoc CurDebugLoc;
+
+ DenseMap<const Value*, SDValue> NodeMap;
+
+ /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+ /// to preserve debug information for incoming arguments.
+ DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+
+ /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+ class DanglingDebugInfo {
+ const DbgValueInst* DI;
+ DebugLoc dl;
+ unsigned SDNodeOrder;
+ public:
+ DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
+ DI(di), dl(DL), SDNodeOrder(SDNO) { }
+ const DbgValueInst* getDI() { return DI; }
+ DebugLoc getdl() { return dl; }
+ unsigned getSDNodeOrder() { return SDNodeOrder; }
+ };
+
+ /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
+ /// yet seen the referent. We defer handling these until we do see it.
+ DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+
+public:
+ /// PendingLoads - Loads are not emitted to the program immediately. We bunch
+ /// them up and then emit token factor nodes when possible. This allows us to
+ /// get simple disambiguation between loads without worrying about alias
+ /// analysis.
+ SmallVector<SDValue, 8> PendingLoads;
+private:
+
+ /// PendingExports - CopyToReg nodes that copy values to virtual registers
+ /// for export to other blocks need to be emitted before any terminator
+ /// instruction, but they have no other ordering requirements. We bunch them
+ /// up and the emit a single tokenfactor for them just before terminator
+ /// instructions.
+ SmallVector<SDValue, 8> PendingExports;
+
+ /// SDNodeOrder - A unique monotonically increasing number used to order the
+ /// SDNodes we create.
+ unsigned SDNodeOrder;
+
+ /// Case - A struct to record the Value for a switch case, and the
+ /// case's target basic block.
+ struct Case {
+ const Constant *Low;
+ const Constant *High;
+ MachineBasicBlock* BB;
+ uint32_t ExtraWeight;
+
+ Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
+ Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
+ uint32_t extraweight) : Low(low), High(high), BB(bb),
+ ExtraWeight(extraweight) { }
+
+ APInt size() const {
+ const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+ const APInt &rLow = cast<ConstantInt>(Low)->getValue();
+ return (rHigh - rLow + 1ULL);
+ }
+ };
+
+ struct CaseBits {
+ uint64_t Mask;
+ MachineBasicBlock* BB;
+ unsigned Bits;
+ uint32_t ExtraWeight;
+
+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
+ uint32_t Weight):
+ Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
+ };
+
+ typedef std::vector<Case> CaseVector;
+ typedef std::vector<CaseBits> CaseBitsVector;
+ typedef CaseVector::iterator CaseItr;
+ typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+ /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+ /// of conditional branches.
+ struct CaseRec {
+ CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+ CaseRange r) :
+ CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+ /// CaseBB - The MBB in which to emit the compare and branch
+ MachineBasicBlock *CaseBB;
+ /// LT, GE - If nonzero, we know the current case value must be less-than or
+ /// greater-than-or-equal-to these Constants.
+ const Constant *LT;
+ const Constant *GE;
+ /// Range - A pair of iterators representing the range of case values to be
+ /// processed at this point in the binary search tree.
+ CaseRange Range;
+ };
+
+ typedef std::vector<CaseRec> CaseRecVector;
+
+ struct CaseBitsCmp {
+ bool operator()(const CaseBits &C1, const CaseBits &C2) {
+ return C1.Bits > C2.Bits;
+ }
+ };
+
+ size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
+
+ /// CaseBlock - This structure is used to communicate between
+ /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+ /// blocks needed by multi-case switch statements.
+ struct CaseBlock {
+ CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+ const Value *cmpmiddle,
+ MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+ MachineBasicBlock *me,
+ uint32_t trueweight = 0, uint32_t falseweight = 0)
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
+ TrueWeight(trueweight), FalseWeight(falseweight) { }
+
+ // CC - the condition code to use for the case block's setcc node
+ ISD::CondCode CC;
+
+ // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+ // Emit by default LHS op RHS. MHS is used for range comparisons:
+ // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+ const Value *CmpLHS, *CmpMHS, *CmpRHS;
+
+ // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+ MachineBasicBlock *TrueBB, *FalseBB;
+
+ // ThisBB - the block into which to emit the code for the setcc and branches
+ MachineBasicBlock *ThisBB;
+
+ // TrueWeight/FalseWeight - branch weights.
+ uint32_t TrueWeight, FalseWeight;
+ };
+
+ struct JumpTable {
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+
+ /// Reg - the virtual register containing the index of the jump table entry
+ //. to jump to.
+ unsigned Reg;
+ /// JTI - the JumpTableIndex for this jump table in the function.
+ unsigned JTI;
+ /// MBB - the MBB into which to emit the code for the indirect jump.
+ MachineBasicBlock *MBB;
+ /// Default - the MBB of the default bb, which is a successor of the range
+ /// check MBB. This is when updating PHI nodes in successors.
+ MachineBasicBlock *Default;
+ };
+ struct JumpTableHeader {
+ JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+ bool E = false):
+ First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+ APInt First;
+ APInt Last;
+ const Value *SValue;
+ MachineBasicBlock *HeaderBB;
+ bool Emitted;
+ };
+ typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+ struct BitTestCase {
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
+ uint32_t Weight):
+ Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { }
+ uint64_t Mask;
+ MachineBasicBlock *ThisBB;
+ MachineBasicBlock *TargetBB;
+ uint32_t ExtraWeight;
+ };
+
+ typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+ struct BitTestBlock {
+ BitTestBlock(APInt F, APInt R, const Value* SV,
+ unsigned Rg, MVT RgVT, bool E,
+ MachineBasicBlock* P, MachineBasicBlock* D,
+ const BitTestInfo& C):
+ First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
+ Parent(P), Default(D), Cases(C) { }
+ APInt First;
+ APInt Range;
+ const Value *SValue;
+ unsigned Reg;
+ MVT RegVT;
+ bool Emitted;
+ MachineBasicBlock *Parent;
+ MachineBasicBlock *Default;
+ BitTestInfo Cases;
+ };
+
+public:
+ // TLI - This is information that describes the available target features we
+ // need for lowering. This indicates when operations are unavailable,
+ // implemented with a libcall, etc.
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+ const DataLayout *TD;
+ AliasAnalysis *AA;
+ const TargetLibraryInfo *LibInfo;
+
+ /// SwitchCases - Vector of CaseBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<CaseBlock> SwitchCases;
+ /// JTCases - Vector of JumpTable structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<JumpTableBlock> JTCases;
+ /// BitTestCases - Vector of BitTestBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<BitTestBlock> BitTestCases;
+
+ // Emit PHI-node-operand constants only once even if used by multiple
+ // PHI nodes.
+ DenseMap<const Constant *, unsigned> ConstantsOut;
+
+ /// FuncInfo - Information about the function as a whole.
+ ///
+ FunctionLoweringInfo &FuncInfo;
+
+ /// OptLevel - What optimization level we're generating code for.
+ ///
+ CodeGenOpt::Level OptLevel;
+
+ /// GFI - Garbage collection metadata for the function.
+ GCFunctionInfo *GFI;
+
+ /// LPadToCallSiteMap - Map a landing pad to the call site indexes.
+ DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
+
+ /// HasTailCall - This is set to true if a call in the current
+ /// block has been translated as a tail call. In this case,
+ /// no subsequent DAG nodes should be created.
+ ///
+ bool HasTailCall;
+
+ LLVMContext *Context;
+
+ SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
+ CodeGenOpt::Level ol)
+ : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+ DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+ HasTailCall(false) {
+ }
+
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li);
+
+ /// clear - Clear out the current SelectionDAG and the associated
+ /// state and prepare this SelectionDAGBuilder object to be used
+ /// for a new block. This doesn't clear out information about
+ /// additional blocks that are needed to complete switch lowering
+ /// or PHI node updating; that information is cleared out as it is
+ /// consumed.
+ void clear();
+
+ /// clearDanglingDebugInfo - Clear the dangling debug information
+ /// map. This function is separated from the clear so that debug
+ /// information that is dangling in a basic block can be properly
+ /// resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached
+ /// to PHI nodes.
+ void clearDanglingDebugInfo();
+
+ /// getRoot - Return the current virtual root of the Selection DAG,
+ /// flushing any PendingLoad items. This must be done before emitting
+ /// a store or any other node that may need to be ordered after any
+ /// prior load instructions.
+ ///
+ SDValue getRoot();
+
+ /// getControlRoot - Similar to getRoot, but instead of flushing all the
+ /// PendingLoad items, flush all the PendingExports items. It is necessary
+ /// to do this before emitting a terminator instruction.
+ ///
+ SDValue getControlRoot();
+
+ DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+
+ unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+ void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
+
+ /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
+ /// from how the code appeared in the source. The ordering is used by the
+ /// scheduler to effectively turn off scheduling.
+ void AssignOrderingToNode(const SDNode *Node);
+
+ void visit(const Instruction &I);
+
+ void visit(unsigned Opcode, const User &I);
+
+ // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+ // generate the debug data structures now that we've seen its definition.
+ void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+ SDValue getValue(const Value *V);
+ SDValue getNonRegisterValue(const Value *V);
+ SDValue getValueImpl(const Value *V);
+
+ void setValue(const Value *V, SDValue NewN) {
+ SDValue &N = NodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void setUnusedArgValue(const Value *V, SDValue NewN) {
+ SDValue &N = UnusedArgNodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB, unsigned Opc);
+ void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB);
+ bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+ bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+ void CopyToExportRegsIfNeeded(const Value *V);
+ void ExportFromCurrentBlock(const Value *V);
+ void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+ MachineBasicBlock *LandingPad = NULL);
+
+ /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// references that ned to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
+private:
+ // Terminator instructions.
+ void visitRet(const ReturnInst &I);
+ void visitBr(const BranchInst &I);
+ void visitSwitch(const SwitchInst &I);
+ void visitIndirectBr(const IndirectBrInst &I);
+ void visitUnreachable(const UnreachableInst &I) { /* noop */ }
+
+ // Helpers for visitSwitch
+ bool handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+ bool handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+ bool handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+ bool handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ const Value* SV,
+ MachineBasicBlock* Default,
+ MachineBasicBlock *SwitchBB);
+
+ uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
+ void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ uint32_t Weight = 0);
+public:
+ void visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB);
+ void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
+ void visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ uint32_t BranchWeightToNext,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB);
+ void visitJumpTable(JumpTable &JT);
+ void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB);
+
+private:
+ // These all get lowered before this pass.
+ void visitInvoke(const InvokeInst &I);
+ void visitResume(const ResumeInst &I);
+
+ void visitBinary(const User &I, unsigned OpCode);
+ void visitShift(const User &I, unsigned Opcode);
+ void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
+ void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+ void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
+ void visitFSub(const User &I);
+ void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
+ void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+ void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(const User &I);
+ void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (const User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(const User &I);
+ void visitFCmp(const User &I);
+ // Visit the conversion instructions
+ void visitTrunc(const User &I);
+ void visitZExt(const User &I);
+ void visitSExt(const User &I);
+ void visitFPTrunc(const User &I);
+ void visitFPExt(const User &I);
+ void visitFPToUI(const User &I);
+ void visitFPToSI(const User &I);
+ void visitUIToFP(const User &I);
+ void visitSIToFP(const User &I);
+ void visitPtrToInt(const User &I);
+ void visitIntToPtr(const User &I);
+ void visitBitCast(const User &I);
+
+ void visitExtractElement(const User &I);
+ void visitInsertElement(const User &I);
+ void visitShuffleVector(const User &I);
+
+ void visitExtractValue(const ExtractValueInst &I);
+ void visitInsertValue(const InsertValueInst &I);
+ void visitLandingPad(const LandingPadInst &I);
+
+ void visitGetElementPtr(const User &I);
+ void visitSelect(const User &I);
+
+ void visitAlloca(const AllocaInst &I);
+ void visitLoad(const LoadInst &I);
+ void visitStore(const StoreInst &I);
+ void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
+ void visitAtomicRMW(const AtomicRMWInst &I);
+ void visitFence(const FenceInst &I);
+ void visitPHI(const PHINode &I);
+ void visitCall(const CallInst &I);
+ bool visitMemCmpCall(const CallInst &I);
+ bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
+ void visitAtomicLoad(const LoadInst &I);
+ void visitAtomicStore(const StoreInst &I);
+
+ void visitInlineAsm(ImmutableCallSite CS);
+ const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+
+ void visitVAStart(const CallInst &I);
+ void visitVAArg(const VAArgInst &I);
+ void visitVAEnd(const CallInst &I);
+ void visitVACopy(const CallInst &I);
+
+ void visitUserOp1(const Instruction &I) {
+ llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+ }
+ void visitUserOp2(const Instruction &I) {
+ llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+ }
+
+ void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+ /// EmitFuncArgumentDbgValue - If V is an function argument then create
+ /// corresponding DBG_VALUE machine instruction for it now. At the end of
+ /// instruction selection, they will be inserted to the entry BB.
+ bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset, const SDValue &N);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 0000000..3b5823b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,645 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->getName(getMachineOpcode());
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE: return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+ case ISD::RegisterMask: return "RegisterMask";
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::LSDAADDR: return "LSDAADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::TargetIndex: return "TargetIndex";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP: return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FSINCOS: return "fsincos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FPOW: return "fpow";
+
+ case ISD::FPOWI: return "fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+ case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: llvm_unreachable("Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+ case ISD::DEBUGTRAP: return "debugtrap";
+ case ISD::LIFETIME_START: return "lifetime.start";
+ case ISD::LIFETIME_END: return "lifetime.end";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+
+ // Trampolines
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default: return "";
+ case ISD::PRE_INC: return "<pre-inc>";
+ case ISD::PRE_DEC: return "<pre-dec>";
+ case ISD::POST_INC: return "<post-inc>";
+ case ISD::POST_DEC: return "<post-dec>";
+ }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (const void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+ OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+ if (unsigned TF = TI->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<" << *LD->getMemOperand();
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<" << *ST->getMemOperand();
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<" << *M->getMemOperand() << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ int64_t offset = BA->getOffset();
+ OS << "<";
+ WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+ OS << ", ";
+ WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+ OS << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ }
+
+ if (G)
+ if (unsigned Order = G->GetOrdering(this))
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+
+ DebugLoc dl = getDebugLoc();
+ if (G && !dl.isUnknown()) {
+ DIScope
+ Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (Scope.Verify())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << dl.getLine();
+ if (dl.getCol() != 0)
+ OS << ':' << dl.getCol();
+ }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ dbgs() << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+ dbgs() << '\n';
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+ dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS.indent(indent);
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+
+ if (i) OS << ",";
+ OS << " ";
+
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode.
+ OS << (const void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent) {
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ // Don't follow chain operands.
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 0000000..eeea9e4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,3019 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+ cl::desc("Enable extra verbose messages in the \"fast\" "
+ "instruction selector"));
+ // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+ // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+ // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+ // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+ // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+ // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+#endif
+
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+ cl::desc("Enable verbose messages in the \"fast\" "
+ "instruction selector"));
+static cl::opt<bool>
+EnableFastISelAbort("fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower an instruction"));
+static cl::opt<bool>
+EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower a formal argument"));
+
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+ cl::desc("use Machine Branch Probability Info"),
+ cl::init(true), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post legalize types"
+ " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+ cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+ ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false,
+ ViewDAGCombineLT = false,
+ ViewISelDAGs = false, ViewSchedDAGs = false,
+ ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler),
+ cl::desc("Instruction schedulers available (before register"
+ " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+ createDefaultScheduler);
+
+namespace llvm {
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetLowering &TLI = IS->getTargetLowering();
+ const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>();
+
+ if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() ||
+ TLI.getSchedulingPreference() == Sched::Source)
+ return createSourceListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::RegPressure)
+ return createBURRListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::Hybrid)
+ return createHybridListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::VLIW)
+ return createVLIWDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() == Sched::ILP &&
+ "Unknown sched type!");
+ return createILPListDAGScheduler(IS, OptLevel);
+ }
+}
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+#ifndef NDEBUG
+ dbgs() << "If a target marks an instruction with "
+ "'usesCustomInserter', it must implement "
+ "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+ llvm_unreachable(0);
+}
+
+void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ assert(!MI->hasPostISelHook() &&
+ "If a target marks an instruction with 'hasPostISelHook', "
+ "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+ CodeGenOpt::Level OL) :
+ MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
+ FuncInfo(new FunctionLoweringInfo(TLI)),
+ CurDAG(new SelectionDAG(tm, OL)),
+ SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+ GFI(),
+ OptLevel(OL),
+ DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+SelectionDAGISel::~SelectionDAGISel() {
+ delete SDB;
+ delete CurDAG;
+ delete FuncInfo;
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<TargetLibraryInfo>();
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it. In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+ // Loop for blocks with phi nodes.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) continue;
+
+ ReprocessBlock:
+ // For each block with a PHI node, check to see if any of the input values
+ // are potentially trapping constant expressions. Constant expressions are
+ // the only potentially trapping value that can occur as the argument to a
+ // PHI.
+ for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+ if (CE == 0 || !CE->canTrap()) continue;
+
+ // The only case we have to worry about is when the edge is critical.
+ // Since this block has a PHI Node, we assume it has multiple input
+ // edges: check to see if the pred has multiple successors.
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // Okay, we have to split this edge.
+ SplitCriticalEdge(Pred->getTerminator(),
+ GetSuccessorNumber(Pred, BB), SDISel, true);
+ goto ReprocessBlock;
+ }
+ }
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
+ "-fast-isel-verbose requires -fast-isel");
+ assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
+ "-fast-isel-abort requires -fast-isel");
+
+ const Function &Fn = *mf.getFunction();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+ MF = &mf;
+ RegInfo = &MF->getRegInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ LibInfo = &getAnalysis<TargetLibraryInfo>();
+ TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+
+ TargetSubtargetInfo &ST =
+ const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
+ ST.resetSubtargetFeatures(MF);
+ TM.resetTargetOptions(MF);
+
+ DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+ SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
+ CurDAG->init(*MF, TTI);
+ FuncInfo->set(Fn, *MF);
+
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+ else
+ FuncInfo->BPI = 0;
+
+ SDB->init(GFI, *AA, LibInfo);
+
+ MF->setHasMSInlineAsm(false);
+ SelectAllBasicBlocks(Fn);
+
+ // If the first basic block in the function has live ins that need to be
+ // copied into vregs, emit the copies into the top of the block before
+ // emitting the code for the block.
+ MachineBasicBlock *EntryMBB = MF->begin();
+ RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+
+ DenseMap<unsigned, unsigned> LiveInMap;
+ if (!FuncInfo->ArgDbgValues.empty())
+ for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
+ E = RegInfo->livein_end(); LI != E; ++LI)
+ if (LI->second)
+ LiveInMap.insert(std::make_pair(LI->first, LI->second));
+
+ // Insert DBG_VALUE instructions for function arguments to the entry block.
+ for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+ MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ EntryMBB->insert(EntryMBB->begin(), MI);
+ else {
+ MachineInstr *Def = RegInfo->getVRegDef(Reg);
+ MachineBasicBlock::iterator InsertPos = Def;
+ // FIXME: VR def may not be in entry block.
+ Def->getParent()->insert(llvm::next(InsertPos), MI);
+ }
+
+ // If Reg is live-in then update debug info to track its copy in a vreg.
+ DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+ if (LDI != LiveInMap.end()) {
+ MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+ MachineBasicBlock::iterator InsertPos = Def;
+ const MDNode *Variable =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ unsigned Offset = MI->getOperand(1).getImm();
+ // Def is never a terminator here, so it is ok to increment InsertPos.
+ BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(LDI->second, RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = NULL;
+ for (MachineRegisterInfo::use_iterator
+ UI = RegInfo->use_begin(LDI->second);
+ MachineInstr *UseMI = UI.skipInstruction();) {
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = NULL; break;
+ }
+ if (CopyUseMI) {
+ MachineInstr *NewMI =
+ BuildMI(*MF, CopyUseMI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+ MachineBasicBlock::iterator Pos = CopyUseMI;
+ EntryMBB->insertAfter(Pos, NewMI);
+ }
+ }
+ }
+
+ // Determine if there are any calls in this machine function.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
+
+ if (MFI->hasCalls() && MF->hasMSInlineAsm())
+ break;
+
+ const MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end();
+ II != IE; ++II) {
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
+ if ((MCID.isCall() && !MCID.isReturn()) ||
+ II->isStackAligningInlineAsm()) {
+ MFI->setHasCalls(true);
+ }
+ if (II->isMSInlineAsm()) {
+ MF->setHasMSInlineAsm(true);
+ }
+ }
+ }
+
+ // Determine if there is a call to setjmp in the machine function.
+ MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
+
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<unsigned, unsigned>::iterator
+ I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ for (;;) {
+ DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+ if (J == E) break;
+ To = J->second;
+ }
+ // Replace it.
+ MRI.replaceRegWith(From, To);
+ }
+
+ // Freeze the set of reserved registers now that MachineFrameInfo has been
+ // set up. All the information required by getReservedRegs() should be
+ // available now.
+ MRI.freezeReservedRegs(*MF);
+
+ // Release function-specific state. SDB and CurDAG are already cleared
+ // at this point.
+ FuncInfo->clear();
+
+ return true;
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End,
+ bool &HadTailCall) {
+ // Lower all of the non-terminator instructions. If a call is emitted
+ // as a tail call, cease emitting nodes for this block. Terminators
+ // are handled below.
+ for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
+ SDB->visit(*I);
+
+ // Make sure the root of the DAG is up-to-date.
+ CurDAG->setRoot(SDB->getControlRoot());
+ HadTailCall = SDB->HasTailCall;
+ SDB->clear();
+
+ // Final step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG();
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+ SmallPtrSet<SDNode*, 128> VisitedNodes;
+ SmallVector<SDNode*, 128> Worklist;
+
+ Worklist.push_back(CurDAG->getRoot().getNode());
+
+ APInt KnownZero;
+ APInt KnownOne;
+
+ do {
+ SDNode *N = Worklist.pop_back_val();
+
+ // If we've already seen this node, ignore it.
+ if (!VisitedNodes.insert(N))
+ continue;
+
+ // Otherwise, add all chain operands to the worklist.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ Worklist.push_back(N->getOperand(i).getNode());
+
+ // If this is a CopyToReg with a vreg dest, process it.
+ if (N->getOpcode() != ISD::CopyToReg)
+ continue;
+
+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // Ignore non-scalar or non-integer values.
+ SDValue Src = N->getOperand(2);
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isInteger() || SrcVT.isVector())
+ continue;
+
+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+ CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne);
+ FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
+ } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+ std::string GroupName;
+ if (TimePassesIsEnabled)
+ GroupName = "Instruction Selection and Scheduling";
+ std::string BlockName;
+ int BlockNumber = -1;
+ (void)BlockNumber;
+#ifdef NDEBUG
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+ ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ ViewSUnitDAGs)
+#endif
+ {
+ BlockNumber = FuncInfo->MBB->getNumber();
+ BlockName = MF->getName().str() + ":" +
+ FuncInfo->MBB->getBasicBlock()->getName().str();
+ }
+ DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+ // Run the DAG combiner in pre-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
+ CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+ BlockName);
+
+ bool Changed;
+ {
+ NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (Changed) {
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName,
+ TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+ }
+
+ {
+ NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeVectors();
+ }
+
+ if (Changed) {
+ {
+ NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
+ CurDAG->LegalizeTypes();
+ }
+
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
+ TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
+ << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
+ }
+
+ if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+
+ {
+ NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
+ CurDAG->Legalize();
+ }
+
+ DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+ // Run the DAG combiner in post-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (OptLevel != CodeGenOpt::None)
+ ComputeLiveOutVRegInfo();
+
+ if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ {
+ NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
+ DoInstructionSelection();
+ }
+
+ DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+
+ // Schedule machine code.
+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+ {
+ NamedRegionTimer T("Instruction Scheduling", GroupName,
+ TimePassesIsEnabled);
+ Scheduler->Run(CurDAG, FuncInfo->MBB);
+ }
+
+ if (ViewSUnitDAGs) Scheduler->viewGraph();
+
+ // Emit machine code to BB. This can change 'BB' to the last block being
+ // inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
+ {
+ NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+
+ // FuncInfo->InsertPt is passed by reference and set to the end of the
+ // scheduled instructions.
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
+ }
+
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
+ // Free the scheduler state.
+ {
+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
+ TimePassesIsEnabled);
+ delete Scheduler;
+ }
+
+ // Free the SelectionDAG state, now that we're finished with it.
+ CurDAG->clear();
+}
+
+namespace {
+/// ISelUpdater - helper class to handle updates of the instruction selection
+/// graph.
+class ISelUpdater : public SelectionDAG::DAGUpdateListener {
+ SelectionDAG::allnodes_iterator &ISelPosition;
+public:
+ ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
+ : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
+
+ /// NodeDeleted - Handle nodes deleted from the graph. If the node being
+ /// deleted is the current ISelPosition node, update ISelPosition.
+ ///
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ if (ISelPosition == SelectionDAG::allnodes_iterator(N))
+ ++ISelPosition;
+ }
+};
+} // end anonymous namespace
+
+void SelectionDAGISel::DoInstructionSelection() {
+ DEBUG(errs() << "===== Instruction selection begins: BB#"
+ << FuncInfo->MBB->getNumber()
+ << " '" << FuncInfo->MBB->getName() << "'\n");
+
+ PreprocessISelDAG();
+
+ // Select target instructions for the DAG.
+ {
+ // Number all nodes with a topological order and set DAGSize.
+ DAGSize = CurDAG->AssignTopologicalOrder();
+
+ // Create a dummy node (which is not added to allnodes), that adds
+ // a reference to the root node, preventing it from being deleted,
+ // and tracking any changes of the root.
+ HandleSDNode Dummy(CurDAG->getRoot());
+ SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode());
+ ++ISelPosition;
+
+ // Make sure that ISelPosition gets properly updated when nodes are deleted
+ // in calls made from this function.
+ ISelUpdater ISU(*CurDAG, ISelPosition);
+
+ // The AllNodes list is now topological-sorted. Visit the
+ // nodes by starting at the end of the list (the root of the
+ // graph) and preceding back toward the beginning (the entry
+ // node).
+ while (ISelPosition != CurDAG->allnodes_begin()) {
+ SDNode *Node = --ISelPosition;
+ // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+ // but there are currently some corner cases that it misses. Also, this
+ // makes it theoretically possible to disable the DAGCombiner.
+ if (Node->use_empty())
+ continue;
+
+ SDNode *ResNode = Select(Node);
+
+ // FIXME: This is pretty gross. 'Select' should be changed to not return
+ // anything at all and this code should be nuked with a tactical strike.
+
+ // If node should not be replaced, continue with the next one.
+ if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
+ continue;
+ // Replace node.
+ if (ResNode) {
+ // Propagate ordering
+ CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node));
+
+ ReplaceUses(Node, ResNode);
+ }
+
+ // If after the replacement this node is not used any more,
+ // remove this dead node.
+ if (Node->use_empty()) // Don't delete EntryToken, etc.
+ CurDAG->RemoveDeadNode(Node);
+ }
+
+ CurDAG->setRoot(Dummy.getValue());
+ }
+
+ DEBUG(errs() << "===== Instruction selection ends:\n");
+
+ PostprocessISelDAG();
+}
+
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+void SelectionDAGISel::PrepareEHLandingPad() {
+ MachineBasicBlock *MBB = FuncInfo->MBB;
+
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
+
+ // Assign the call site to the landing pad's begin label.
+ MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+
+ const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ .addSym(Label);
+
+ // Mark exception register as live in.
+ unsigned Reg = TLI.getExceptionPointerRegister();
+ if (Reg) MBB->addLiveIn(Reg);
+
+ // Mark exception selector register as live in.
+ Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) MBB->addLiveIn(Reg);
+}
+
+/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
+/// load into the specified FoldInst. Note that we could have a sequence where
+/// multiple LLVM IR instructions are folded into the same machineinstr. For
+/// example we could have:
+/// A: x = load i32 *P
+/// B: y = icmp A, 42
+/// C: br y, ...
+///
+/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
+/// any other folded instructions) because it is between A and C.
+///
+/// If we succeed in folding the load into the operation, return true.
+///
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+ const Instruction *FoldInst,
+ FastISel *FastIS) {
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->use_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->use_back();
+ }
+
+ // If we didn't find the fold instruction, then we failed to collapse the
+ // sequence.
+ if (TheUser != FoldInst)
+ return false;
+
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile()) return false;
+
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
+ unsigned LoadReg = FastIS->getRegForValue(LI);
+ if (LoadReg == 0)
+ return false;
+
+ // Check to see what the uses of this vreg are. If it has no uses, or more
+ // than one use (at the machine instr level) then we can't fold it.
+ MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+ if (RI == RegInfo->reg_end())
+ return false;
+
+ // See if there is exactly one use of the vreg. If there are multiple uses,
+ // then the instruction got lowered to multiple machine instructions or the
+ // use of the loaded value ended up being multiple operands of the result, in
+ // either case, we can't fold this.
+ MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+ if (PostRI != RegInfo->reg_end())
+ return false;
+
+ assert(RI.getOperand().isUse() &&
+ "The only use of the vreg must be a use, we haven't emitted the def!");
+
+ MachineInstr *User = &*RI;
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes, make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo->InsertPt = User;
+ FuncInfo->MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+ FunctionLoweringInfo *FuncInfo) {
+ return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded.
+ !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+}
+
+#ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses. Only those
+// instructions that cause the bail are accounted for. It does not account for
+// instructions higher in the block. Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
+static void collectFailStats(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: assert (0 && "<Invalid operator> ");
+
+ // Terminators
+ case Instruction::Ret: NumFastIselFailRet++; return;
+ case Instruction::Br: NumFastIselFailBr++; return;
+ case Instruction::Switch: NumFastIselFailSwitch++; return;
+ case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
+ case Instruction::Invoke: NumFastIselFailInvoke++; return;
+ case Instruction::Resume: NumFastIselFailResume++; return;
+ case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+ // Standard binary operators...
+ case Instruction::Add: NumFastIselFailAdd++; return;
+ case Instruction::FAdd: NumFastIselFailFAdd++; return;
+ case Instruction::Sub: NumFastIselFailSub++; return;
+ case Instruction::FSub: NumFastIselFailFSub++; return;
+ case Instruction::Mul: NumFastIselFailMul++; return;
+ case Instruction::FMul: NumFastIselFailFMul++; return;
+ case Instruction::UDiv: NumFastIselFailUDiv++; return;
+ case Instruction::SDiv: NumFastIselFailSDiv++; return;
+ case Instruction::FDiv: NumFastIselFailFDiv++; return;
+ case Instruction::URem: NumFastIselFailURem++; return;
+ case Instruction::SRem: NumFastIselFailSRem++; return;
+ case Instruction::FRem: NumFastIselFailFRem++; return;
+
+ // Logical operators...
+ case Instruction::And: NumFastIselFailAnd++; return;
+ case Instruction::Or: NumFastIselFailOr++; return;
+ case Instruction::Xor: NumFastIselFailXor++; return;
+
+ // Memory instructions...
+ case Instruction::Alloca: NumFastIselFailAlloca++; return;
+ case Instruction::Load: NumFastIselFailLoad++; return;
+ case Instruction::Store: NumFastIselFailStore++; return;
+ case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+ case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
+ case Instruction::Fence: NumFastIselFailFence++; return;
+ case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+ // Convert instructions...
+ case Instruction::Trunc: NumFastIselFailTrunc++; return;
+ case Instruction::ZExt: NumFastIselFailZExt++; return;
+ case Instruction::SExt: NumFastIselFailSExt++; return;
+ case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
+ case Instruction::FPExt: NumFastIselFailFPExt++; return;
+ case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
+ case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
+ case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
+ case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
+ case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+ case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+ case Instruction::BitCast: NumFastIselFailBitCast++; return;
+
+ // Other instructions...
+ case Instruction::ICmp: NumFastIselFailICmp++; return;
+ case Instruction::FCmp: NumFastIselFailFCmp++; return;
+ case Instruction::PHI: NumFastIselFailPHI++; return;
+ case Instruction::Select: NumFastIselFailSelect++; return;
+ case Instruction::Call: NumFastIselFailCall++; return;
+ case Instruction::Shl: NumFastIselFailShl++; return;
+ case Instruction::LShr: NumFastIselFailLShr++; return;
+ case Instruction::AShr: NumFastIselFailAShr++; return;
+ case Instruction::VAArg: NumFastIselFailVAArg++; return;
+ case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+ case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
+ case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
+ case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
+ case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
+ case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
+ }
+}
+#endif
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = 0;
+ if (TM.Options.EnableFastISel)
+ FastIS = TLI.createFastISel(*FuncInfo, LibInfo);
+
+ // Iterate over all basic blocks in the function.
+ ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+ for (ReversePostOrderTraversal<const Function*>::rpo_iterator
+ I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+ const BasicBlock *LLVMBB = *I;
+
+ if (OptLevel != CodeGenOpt::None) {
+ bool AllPredsVisited = true;
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ if (!FuncInfo->VisitedBBs.count(*PI)) {
+ AllPredsVisited = false;
+ break;
+ }
+ }
+
+ if (AllPredsVisited) {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ FuncInfo->ComputePHILiveOutRegInfo(PN);
+ } else {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ FuncInfo->InvalidatePHILiveOutRegInfo(PN);
+ }
+
+ FuncInfo->VisitedBBs.insert(LLVMBB);
+ }
+
+ BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+ BasicBlock::const_iterator const End = LLVMBB->end();
+ BasicBlock::const_iterator BI = End;
+
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+ // Setup an EH landing-pad block.
+ if (FuncInfo->MBB->isLandingPad())
+ PrepareEHLandingPad();
+
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS) {
+ FastIS->startNewBlock();
+
+ // Emit code for any incoming arguments. This must happen before
+ // beginning FastISel on the entry block.
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ // Lower any arguments needed in this block if this is the entry block.
+ if (!FastIS->LowerArguments()) {
+ // Fast isel failed to lower these arguments
+ if (EnableFastISelAbortArgs)
+ llvm_unreachable("FastISel didn't lower all arguments");
+
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(0);
+ }
+
+ unsigned NumFastIselRemaining = std::distance(Begin, End);
+ // Do FastISel on as many instructions as possible.
+ for (; BI != Begin; --BI) {
+ const Instruction *Inst = llvm::prior(BI);
+
+ // If we no longer require this instruction, skip it.
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+ --NumFastIselRemaining;
+ continue;
+ }
+
+ // Bottom-up: reset the insert pos at the top, after any local-value
+ // instructions.
+ FastIS->recomputeInsertPt();
+
+ // Try to select the instruction with FastISel.
+ if (FastIS->SelectInstruction(Inst)) {
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ // If fast isel succeeded, skip over all the folded instructions, and
+ // then see if there is a load right before the selected instructions.
+ // Try to fold the load if so.
+ const Instruction *BeforeInst = Inst;
+ while (BeforeInst != Begin) {
+ BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+ if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ break;
+ }
+ if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() &&
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
+ // If we succeeded, don't re-select the load.
+ BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ }
+ continue;
+ }
+
+#ifndef NDEBUG
+ if (EnableFastISelVerbose2)
+ collectFailStats(Inst);
+#endif
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ if (isa<CallInst>(Inst)) {
+
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed call: ";
+ Inst->dump();
+ }
+
+ if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+ unsigned &R = FuncInfo->ValueMap[Inst];
+ if (!R)
+ R = FuncInfo->CreateRegs(Inst->getType());
+ }
+
+ bool HadTailCall = false;
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
+ SelectBasicBlock(Inst, BI, HadTailCall);
+
+ // If the call was emitted as a tail call, we're done with the block.
+ // We also need to delete any previously emitted instructions.
+ if (HadTailCall) {
+ FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end());
+ --BI;
+ break;
+ }
+
+ // Recompute NumFastIselRemaining as Selection DAG instruction
+ // selection may have handled the call, input args, etc.
+ unsigned RemainingNow = std::distance(Begin, BI);
+ NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+ NumFastIselRemaining = RemainingNow;
+ continue;
+ }
+
+ if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
+ // Don't abort, and use a different message for terminator misses.
+ NumFastIselFailures += NumFastIselRemaining;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed terminator: ";
+ Inst->dump();
+ }
+ } else {
+ NumFastIselFailures += NumFastIselRemaining;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel miss: ";
+ Inst->dump();
+ }
+ if (EnableFastISelAbort)
+ // The "fast" selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ llvm_unreachable("FastISel didn't select the entire block");
+ }
+ break;
+ }
+
+ FastIS->recomputeInsertPt();
+ } else {
+ // Lower any arguments needed in this block if this is the entry block.
+ if (LLVMBB == &Fn.getEntryBlock())
+ LowerArguments(Fn);
+ }
+
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
+ if (Begin != BI) {
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
+ }
+
+ FinishBasicBlock();
+ FuncInfo->PHINodesToUpdate.clear();
+ }
+
+ delete FastIS;
+ SDB->clearDanglingDebugInfo();
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+
+ DEBUG(dbgs() << "Total amount of phi nodes to update: "
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
+ dbgs() << "Node " << i << " : ("
+ << FuncInfo->PHINodesToUpdate[i].first
+ << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ if (SDB->SwitchCases.empty() &&
+ SDB->JTCases.empty() &&
+ SDB->BitTestCases.empty()) {
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+ continue;
+ PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
+ }
+ return;
+ }
+
+ for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->BitTestCases[i].Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->BitTestCases[i].Parent;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ uint32_t UnhandledWeight = 0;
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j)
+ UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight;
+
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
+ UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight;
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ if (j+1 != ej)
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Cases[j+1].ThisBB,
+ UnhandledWeight,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j],
+ FuncInfo->MBB);
+ else
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Default,
+ UnhandledWeight,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j],
+ FuncInfo->MBB);
+
+
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB.
+ if (PHIBB == SDB->BitTestCases[i].Default)
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(SDB->BitTestCases[i].Parent)
+ .addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB);
+ // One of "cases" BB.
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
+ j != ej; ++j) {
+ MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ if (cBB->isSuccessor(PHIBB))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB);
+ }
+ }
+ }
+ SDB->BitTestCases.clear();
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->JTCases[i].first.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
+ FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTable(SDB->JTCases[i].second);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == SDB->JTCases[i].second.Default)
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(SDB->JTCases[i].first.HeaderBB);
+ // JT BB. Just iterate over successors here
+ if (FuncInfo->MBB->isSuccessor(PHIBB))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
+ }
+ }
+ SDB->JTCases.clear();
+
+ // If the switch block involved a branch to one of the actual successors, we
+ // need to update PHI nodes in that block.
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
+ }
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+ // Determine the unique successors.
+ SmallVector<MachineBasicBlock *, 2> Succs;
+ Succs.push_back(SDB->SwitchCases[i].TrueBB);
+ if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
+ Succs.push_back(SDB->SwitchCases[i].FalseBB);
+
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
+ SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ FuncInfo->MBB = Succs[i];
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // FuncInfo->MBB may have been removed from the CFG if a branch was
+ // constant folded.
+ if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+ for (MachineBasicBlock::iterator
+ MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end();
+ MBBI != MBBE && MBBI->isPHI(); ++MBBI) {
+ MachineInstrBuilder PHI(*MF, MBBI);
+ // This value for this PHI node is recorded in PHINodesToUpdate.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != FuncInfo->PHINodesToUpdate.size() &&
+ "Didn't find PHI entry!");
+ if (FuncInfo->PHINodesToUpdate[pn].first == PHI) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ SDB->SwitchCases.clear();
+}
+
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+ RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+ if (!Ctor) {
+ Ctor = ISHeuristic;
+ RegisterScheduler::setDefault(Ctor);
+ }
+
+ return Ctor(this, OptLevel);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+
+ APInt KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if ((NeededMask & KnownOne) == NeededMask)
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+ std::vector<SDValue> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+ Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
+ Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
+
+ unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+ if (!InlineAsm::isMemKind(Flags)) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i,
+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ } else {
+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ "Memory operand with multiple values?");
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDValue> SelOps;
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+ report_fatal_error("Could not match memory address. Inline asm"
+ " failure!");
+
+ // Add this to the output node.
+ unsigned NewFlags =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+ Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ i += 2;
+ }
+ }
+
+ // Add the glue input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
+/// SDNode.
+///
+static SDNode *findGlueUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDUse &Use = I.getUse();
+ if (Use.getResNo() == FlagResNo)
+ return Use.getUser();
+ }
+ return NULL;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+ SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+ bool IgnoreChains) {
+ // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+ // greater than all of its (recursive) operands. If we scan to a point where
+ // 'use' is smaller than the node we're scanning for, then we know we will
+ // never find it.
+ //
+ // The Use may be -1 (unassigned) if it is a newly allocated node. This can
+ // happen because we scan down to newly selected nodes in the case of glue
+ // uses.
+ if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+ return false;
+
+ // Don't revisit nodes if we already scanned it and didn't fail, we know we
+ // won't fail if we scan it again.
+ if (!Visited.insert(Use))
+ return false;
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ // Ignore chain uses, they are validated by HandleMergeInputChains.
+ if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+ continue;
+
+ SDNode *N = Use->getOperand(i).getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
+ return true;
+ }
+ return false;
+}
+
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+ return N.hasOneUse();
+}
+
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+ CodeGenOpt::Level OptLevel,
+ bool IgnoreChains) {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ // If Root use can somehow reach N through a path that that doesn't contain
+ // U then folding N would create a cycle. e.g. In the following
+ // diagram, Root can reach N through X. If N is folded into into Root, then
+ // X is both a predecessor and a successor of U.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ / //
+ // \ / //
+ // [Root*] //
+ //
+ // * indicates nodes to be folded together.
+ //
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
+ // check if it might reach N.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ \ //
+ // \ | //
+ // [Root*] | //
+ // ^ | //
+ // f | //
+ // | / //
+ // [Y] / //
+ // ^ / //
+ // f / //
+ // | / //
+ // [GU] //
+ //
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
+ // a cycle in the scheduling graph.
+
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
+ EVT VT = Root->getValueType(Root->getNumValues()-1);
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (GU == NULL)
+ break;
+ Root = GU;
+ VT = Root->getValueType(Root->getNumValues()-1);
+
+ // If our query node has a glue result with a use, we've walked up it. If
+ // the user (which has already been selected) has a chain or indirectly uses
+ // the chain, our WalkChainUsers predicate will not consider it. Because of
+ // this, we cannot ignore chains in this predicate.
+ IgnoreChains = false;
+ }
+
+
+ SmallPtrSet<SDNode*, 16> Visited;
+ return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
+}
+
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+ std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+ SelectInlineAsmMemoryOperands(Ops);
+
+ EVT VTs[] = { MVT::Other, MVT::Glue };
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
+SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
+ return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
+}
+
+/// GetVBR - decode a vbr encoding whose top bit is set.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+ assert(Val >= 128 && "Not a VBR");
+ Val &= 127; // Remove first vbr bit.
+
+ unsigned Shift = 7;
+ uint64_t NextBits;
+ do {
+ NextBits = MatcherTable[Idx++];
+ Val |= (NextBits&127) << Shift;
+ Shift += 7;
+ } while (NextBits & 128);
+
+ return Val;
+}
+
+
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
+void SelectionDAGISel::
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SDValue InputGlue,
+ const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+ bool isMorphNodeTo) {
+ SmallVector<SDNode*, 4> NowDeadNodes;
+
+ // Now that all the normal results are replaced, we replace the chain and
+ // glue results if present.
+ if (!ChainNodesMatched.empty()) {
+ assert(InputChain.getNode() != 0 &&
+ "Matched input chains but didn't produce a chain");
+ // Loop over all of the nodes we matched that produced a chain result.
+ // Replace all the chain results with the final chain we ended up with.
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ SDNode *ChainNode = ChainNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (ChainNode->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ // Don't replace the results of the root node if we're doing a
+ // MorphNodeTo.
+ if (ChainNode == NodeToMatch && isMorphNodeTo)
+ continue;
+
+ SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+ if (ChainVal.getValueType() == MVT::Glue)
+ ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+ assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
+
+ // If the node became dead and we haven't already seen it, delete it.
+ if (ChainNode->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
+ NowDeadNodes.push_back(ChainNode);
+ }
+ }
+
+ // If the result produces glue, update any glue results in the matched
+ // pattern with the glue result.
+ if (InputGlue.getNode() != 0) {
+ // Handle any interior nodes explicitly marked.
+ for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+ SDNode *FRN = GlueResultNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (FRN->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+ "Doesn't have a glue result");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
+ InputGlue);
+
+ // If the node became dead and we haven't already seen it, delete it.
+ if (FRN->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
+ NowDeadNodes.push_back(FRN);
+ }
+ }
+
+ if (!NowDeadNodes.empty())
+ CurDAG->RemoveDeadNodes(NowDeadNodes);
+
+ DEBUG(errs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+ CR_Simple,
+ CR_InducesCycle,
+ CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult
+WalkChainUsers(const SDNode *ChainedNode,
+ SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
+ SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+ ChainResult Result = CR_Simple;
+
+ for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+ E = ChainedNode->use_end(); UI != E; ++UI) {
+ // Make sure the use is of the chain, not some other value we produce.
+ if (UI.getUse().getValueType() != MVT::Other) continue;
+
+ SDNode *User = *UI;
+
+ // If we see an already-selected machine node, then we've gone beyond the
+ // pattern that we're selecting down into the already selected chunk of the
+ // DAG.
+ if (User->isMachineOpcode() ||
+ User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
+ continue;
+
+ unsigned UserOpcode = User->getOpcode();
+ if (UserOpcode == ISD::CopyToReg ||
+ UserOpcode == ISD::CopyFromReg ||
+ UserOpcode == ISD::INLINEASM ||
+ UserOpcode == ISD::EH_LABEL ||
+ UserOpcode == ISD::LIFETIME_START ||
+ UserOpcode == ISD::LIFETIME_END) {
+ // If their node ID got reset to -1 then they've already been selected.
+ // Treat them like a MachineOpcode.
+ if (User->getNodeId() == -1)
+ continue;
+ }
+
+ // If we have a TokenFactor, we handle it specially.
+ if (User->getOpcode() != ISD::TokenFactor) {
+ // If the node isn't a token factor and isn't part of our pattern, then it
+ // must be a random chained node in between two nodes we're selecting.
+ // This happens when we have something like:
+ // x = load ptr
+ // call
+ // y = x+4
+ // store y -> ptr
+ // Because we structurally match the load/store as a read/modify/write,
+ // but the call is chained between them. We cannot fold in this case
+ // because it would induce a cycle in the graph.
+ if (!std::count(ChainedNodesInPattern.begin(),
+ ChainedNodesInPattern.end(), User))
+ return CR_InducesCycle;
+
+ // Otherwise we found a node that is part of our pattern. For example in:
+ // x = load ptr
+ // y = x+4
+ // store y -> ptr
+ // This would happen when we're scanning down from the load and see the
+ // store as a user. Record that there is a use of ChainedNode that is
+ // part of the pattern and keep scanning uses.
+ Result = CR_LeadsToInteriorNode;
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ // If we found a TokenFactor, there are two cases to consider: first if the
+ // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+ // uses of the TF are in our pattern) we just want to ignore it. Second,
+ // the TokenFactor can be sandwiched in between two chained nodes, like so:
+ // [Load chain]
+ // ^
+ // |
+ // [Load]
+ // ^ ^
+ // | \ DAG's like cheese
+ // / \ do you?
+ // / |
+ // [TokenFactor] [Op]
+ // ^ ^
+ // | |
+ // \ /
+ // \ /
+ // [Store]
+ //
+ // In this case, the TokenFactor becomes part of our match and we rewrite it
+ // as a new TokenFactor.
+ //
+ // To distinguish these two cases, do a recursive walk down the uses.
+ switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+ case CR_Simple:
+ // If the uses of the TokenFactor are just already-selected nodes, ignore
+ // it, it is "below" our pattern.
+ continue;
+ case CR_InducesCycle:
+ // If the uses of the TokenFactor lead to nodes that are not part of our
+ // pattern that are not selected, folding would turn this into a cycle,
+ // bail out now.
+ return CR_InducesCycle;
+ case CR_LeadsToInteriorNode:
+ break; // Otherwise, keep processing.
+ }
+
+ // Okay, we know we're in the interesting interior case. The TokenFactor
+ // is now going to be considered part of the pattern so that we rewrite its
+ // uses (it may have uses that are not part of the pattern) with the
+ // ultimate chain result of the generated code. We will also add its chain
+ // inputs as inputs to the ultimate TokenFactor we create.
+ Result = CR_LeadsToInteriorNode;
+ ChainedNodesInPattern.push_back(User);
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains. The
+/// input vector contains a list of all of the chained nodes that we match. We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SelectionDAG *CurDAG) {
+ // Walk all of the chained nodes we've matched, recursively scanning down the
+ // users of the chain result. This adds any TokenFactor nodes that are caught
+ // in between chained nodes to the chained and interior nodes list.
+ SmallVector<SDNode*, 3> InteriorChainedNodes;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+ InteriorChainedNodes) == CR_InducesCycle)
+ return SDValue(); // Would induce a cycle.
+ }
+
+ // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+ // that we are interested in. Form our input TokenFactor node.
+ SmallVector<SDValue, 3> InputChains;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ // Add the input chain of this node to the InputChains list (which will be
+ // the operands of the generated TokenFactor) if it's not an interior node.
+ SDNode *N = ChainNodesMatched[i];
+ if (N->getOpcode() != ISD::TokenFactor) {
+ if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+ continue;
+
+ // Otherwise, add the input chain.
+ SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+ assert(InChain.getValueType() == MVT::Other && "Not a chain");
+ InputChains.push_back(InChain);
+ continue;
+ }
+
+ // If we have a token factor, we want to add all inputs of the token factor
+ // that are not part of the pattern we're matching.
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+ N->getOperand(op).getNode()))
+ InputChains.push_back(N->getOperand(op));
+ }
+ }
+
+ SDValue Res;
+ if (InputChains.size() == 1)
+ return InputChains[0];
+ return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
+ MVT::Other, &InputChains[0], InputChains.size());
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+ // It is possible we're using MorphNodeTo to replace a node with no
+ // normal results with one that has a normal result (or we could be
+ // adding a chain) and the input could have glue and chains as well.
+ // In this case we need to shift the operands down.
+ // FIXME: This is a horrible hack and broken in obscure cases, no worse
+ // than the old isel though.
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
+
+ unsigned NTMNumResults = Node->getNumValues();
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
+ if (NTMNumResults != 1 &&
+ Node->getValueType(NTMNumResults-2) == MVT::Other)
+ OldChainResultNo = NTMNumResults-2;
+ } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+ OldChainResultNo = NTMNumResults-1;
+
+ // Call the underlying SelectionDAG routine to do the transmogrification. Note
+ // that this deletes operands of the old node that become dead.
+ SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ }
+
+ unsigned ResNumResults = Res->getNumValues();
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
+ --ResNumResults;
+
+ // Move the chain reference if needed.
+ if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+ (unsigned)OldChainResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ // Otherwise, no replacement happened because the node already exists. Replace
+ // Uses of the old node with the new one.
+ if (Res != Node)
+ CurDAG->ReplaceAllUsesWith(Node, Res);
+
+ return Res;
+}
+
+/// CheckSame - Implements OP_CheckSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ // Accept if it is exactly the same as a previously recorded node.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ return N == RecordedNodes[RecNo].first;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel) {
+ return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel, SDNode *N) {
+ return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDNode *N) {
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ return N->getOpcode() == Opc;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (N.getValueType() == VT) return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+}
+
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ return cast<CondCodeSDNode>(N)->get() ==
+ (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (cast<VTSDNode>(N)->getVT() == VT)
+ return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ return C != 0 && C->getSExtValue() == Val;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::AND) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::OR) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node. If the current predicate is known to
+/// fail, set Result=true and return anything. If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with. If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+ unsigned Index, SDValue N,
+ bool &Result,
+ const SelectionDAGISel &SDISel,
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ switch (Table[Index++]) {
+ default:
+ Result = false;
+ return Index-1; // Could not evaluate this predicate.
+ case SelectionDAGISel::OPC_CheckSame:
+ Result = !::CheckSame(Table, Index, N, RecordedNodes);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPatternPredicate:
+ Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPredicate:
+ Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckOpcode:
+ Result = !::CheckOpcode(Table, Index, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckType:
+ Result = !::CheckType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Type:
+ case SelectionDAGISel::OPC_CheckChild1Type:
+ case SelectionDAGISel::OPC_CheckChild2Type:
+ case SelectionDAGISel::OPC_CheckChild3Type:
+ case SelectionDAGISel::OPC_CheckChild4Type:
+ case SelectionDAGISel::OPC_CheckChild5Type:
+ case SelectionDAGISel::OPC_CheckChild6Type:
+ case SelectionDAGISel::OPC_CheckChild7Type:
+ Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+ return Index;
+ case SelectionDAGISel::OPC_CheckCondCode:
+ Result = !::CheckCondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckValueType:
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckInteger:
+ Result = !::CheckInteger(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckAndImm:
+ Result = !::CheckAndImm(Table, Index, N, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckOrImm:
+ Result = !::CheckOrImm(Table, Index, N, SDISel);
+ return Index;
+ }
+}
+
+namespace {
+
+struct MatchScope {
+ /// FailIndex - If this match fails, this is the index to continue with.
+ unsigned FailIndex;
+
+ /// NodeStack - The node stack when the scope was formed.
+ SmallVector<SDValue, 4> NodeStack;
+
+ /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+ unsigned NumRecordedNodes;
+
+ /// NumMatchedMemRefs - The number of matched memref entries.
+ unsigned NumMatchedMemRefs;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
+
+ /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+ bool HasChainNodesMatched, HasGlueResultNodesMatched;
+};
+
+}
+
+SDNode *SelectionDAGISel::
+SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+ unsigned TableSize) {
+ // FIXME: Should these even be selected? Handle these cases in the caller?
+ switch (NodeToMatch->getOpcode()) {
+ default:
+ break;
+ case ISD::EntryToken: // These nodes remain the same.
+ case ISD::BasicBlock:
+ case ISD::Register:
+ case ISD::RegisterMask:
+ //case ISD::VALUETYPE:
+ //case ISD::CONDCODE:
+ case ISD::HANDLENODE:
+ case ISD::MDNODE_SDNODE:
+ case ISD::TargetConstant:
+ case ISD::TargetConstantFP:
+ case ISD::TargetConstantPool:
+ case ISD::TargetFrameIndex:
+ case ISD::TargetExternalSymbol:
+ case ISD::TargetBlockAddress:
+ case ISD::TargetJumpTable:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::TargetGlobalAddress:
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::EH_LABEL:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ NodeToMatch->setNodeId(-1); // Mark selected.
+ return 0;
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+ NodeToMatch->getOperand(0));
+ return 0;
+ case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+ case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
+ }
+
+ assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+ // Set up the node stack with NodeToMatch as the only node on the stack.
+ SmallVector<SDValue, 8> NodeStack;
+ SDValue N = SDValue(NodeToMatch, 0);
+ NodeStack.push_back(N);
+
+ // MatchScopes - Scopes used when matching, if a match failure happens, this
+ // indicates where to continue checking.
+ SmallVector<MatchScope, 8> MatchScopes;
+
+ // RecordedNodes - This is the set of nodes that have been recorded by the
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
+ // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+ // pattern.
+ SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+ // These are the current input chain and glue for use when generating nodes.
+ // Various Emit operations change these. For example, emitting a copytoreg
+ // uses and updates these.
+ SDValue InputChain, InputGlue;
+
+ // ChainNodesMatched - If a pattern matches nodes that have input/output
+ // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+ // which ones they are. The result is captured into this list so that we can
+ // update the chain results when the pattern is complete.
+ SmallVector<SDNode*, 3> ChainNodesMatched;
+ SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
+ DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+ NodeToMatch->dump(CurDAG);
+ errs() << '\n');
+
+ // Determine where to start the interpreter. Normally we start at opcode #0,
+ // but if the state machine starts with an OPC_SwitchOpcode, then we
+ // accelerate the first lookup (which is guaranteed to be hot) with the
+ // OpcodeOffset table.
+ unsigned MatcherIndex = 0;
+
+ if (!OpcodeOffset.empty()) {
+ // Already computed the OpcodeOffset table, just index into it.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n");
+
+ } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+ // Otherwise, the table isn't computed, but the state machine does start
+ // with an OPC_SwitchOpcode instruction. Populate the table now, since this
+ // is the first time we're selecting an instruction.
+ unsigned Idx = 1;
+ while (1) {
+ // Get the size of this case.
+ unsigned CaseSize = MatcherTable[Idx++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+ if (CaseSize == 0) break;
+
+ // Get the opcode, add the index to the table.
+ uint16_t Opc = MatcherTable[Idx++];
+ Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+ if (Opc >= OpcodeOffset.size())
+ OpcodeOffset.resize((Opc+1)*2);
+ OpcodeOffset[Opc] = Idx;
+ Idx += CaseSize;
+ }
+
+ // Okay, do the lookup for the first opcode.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ }
+
+ while (1) {
+ assert(MatcherIndex < TableSize && "Invalid index");
+#ifndef NDEBUG
+ unsigned CurrentOpcodeIndex = MatcherIndex;
+#endif
+ BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ switch (Opcode) {
+ case OPC_Scope: {
+ // Okay, the semantics of this operation are that we should push a scope
+ // then evaluate the first child. However, pushing a scope only to have
+ // the first check fail (which then pops it) is inefficient. If we can
+ // determine immediately that the first check (or first several) will
+ // immediately fail, don't even bother pushing a scope for them.
+ unsigned FailIndex;
+
+ while (1) {
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+ // Found the end of the scope with no match.
+ if (NumToSkip == 0) {
+ FailIndex = 0;
+ break;
+ }
+
+ FailIndex = MatcherIndex+NumToSkip;
+
+ unsigned MatcherIndexOfPredicate = MatcherIndex;
+ (void)MatcherIndexOfPredicate; // silence warning.
+
+ // If we can't evaluate this predicate without pushing a scope (e.g. if
+ // it is a 'MoveParent') or if the predicate succeeds on this node, we
+ // push the scope and evaluate the full predicate chain.
+ bool Result;
+ MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+ Result, *this, RecordedNodes);
+ if (!Result)
+ break;
+
+ DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
+ << "index " << MatcherIndexOfPredicate
+ << ", continuing at " << FailIndex << "\n");
+ ++NumDAGIselRetries;
+
+ // Otherwise, we know that this case of the Scope is guaranteed to fail,
+ // move to the next case.
+ MatcherIndex = FailIndex;
+ }
+
+ // If the whole scope failed to match, bail.
+ if (FailIndex == 0) break;
+
+ // Push a MatchScope which indicates where to go if the first child fails
+ // to match.
+ MatchScope NewEntry;
+ NewEntry.FailIndex = FailIndex;
+ NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+ NewEntry.NumRecordedNodes = RecordedNodes.size();
+ NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+ NewEntry.InputChain = InputChain;
+ NewEntry.InputGlue = InputGlue;
+ NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+ NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
+ MatchScopes.push_back(NewEntry);
+ continue;
+ }
+ case OPC_RecordNode: {
+ // Remember this node, it may end up being an operand in the pattern.
+ SDNode *Parent = 0;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
+ continue;
+ }
+
+ case OPC_RecordChild0: case OPC_RecordChild1:
+ case OPC_RecordChild2: case OPC_RecordChild3:
+ case OPC_RecordChild4: case OPC_RecordChild5:
+ case OPC_RecordChild6: case OPC_RecordChild7: {
+ unsigned ChildNo = Opcode-OPC_RecordChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
+ continue;
+ }
+ case OPC_RecordMemRef:
+ MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+ continue;
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
+ if (N->getNumOperands() != 0 &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
+ continue;
+
+ case OPC_MoveChild: {
+ unsigned ChildNo = MatcherTable[MatcherIndex++];
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveParent:
+ // Pop the current node off the NodeStack.
+ NodeStack.pop_back();
+ assert(!NodeStack.empty() && "Node stack imbalance!");
+ N = NodeStack.back();
+ continue;
+
+ case OPC_CheckSame:
+ if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+ continue;
+ case OPC_CheckPatternPredicate:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ continue;
+ case OPC_CheckPredicate:
+ if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+ N.getNode()))
+ break;
+ continue;
+ case OPC_CheckComplexPat: {
+ unsigned CPNum = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
+ RecordedNodes))
+ break;
+ continue;
+ }
+ case OPC_CheckOpcode:
+ if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+ continue;
+
+ case OPC_CheckType:
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+
+ case OPC_SwitchOpcode: {
+ unsigned CurNodeOpcode = N.getOpcode();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
+ // If the opcode matches, then we will execute this case.
+ if (CurNodeOpcode == Opc)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
+ << " to " << MatcherIndex << "\n");
+ continue;
+ }
+
+ case OPC_SwitchType: {
+ MVT CurNodeVT = N.getValueType().getSimpleVT();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (CaseVT == MVT::iPTR)
+ CaseVT = TLI.getPointerTy();
+
+ // If the VT matches, then we will execute this case.
+ if (CurNodeVT == CaseVT)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+ continue;
+ }
+ case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+ case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+ case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+ case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+ Opcode-OPC_CheckChild0Type))
+ break;
+ continue;
+ case OPC_CheckCondCode:
+ if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckValueType:
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+ case OPC_CheckInteger:
+ if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckAndImm:
+ if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckOrImm:
+ if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+
+ case OPC_CheckFoldableChainNode: {
+ assert(NodeStack.size() != 1 && "No parent node");
+ // Verify that all intermediate nodes between the root and this one have
+ // a single use.
+ bool HasMultipleUses = false;
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+ if (!NodeStack[i].hasOneUse()) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+
+ // Check to see that the target thinks this is profitable to fold and that
+ // we can fold it without inducing cycles in the graph.
+ if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch) ||
+ !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch, OptLevel,
+ true/*We validate our own chains*/))
+ break;
+
+ continue;
+ }
+ case OPC_EmitInteger: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
+ continue;
+ }
+ case OPC_EmitRegister: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ continue;
+ }
+ case OPC_EmitRegister2: {
+ // For targets w/ more than 256 register names, the register enum
+ // values are stored in two bytes in the matcher table (just like
+ // opcodes).
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RegNo |= MatcherTable[MatcherIndex++] << 8;
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ continue;
+ }
+
+ case OPC_EmitConvertToTarget: {
+ // Convert from IMM/FPIMM to target version.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Imm = RecordedNodes[RecNo].first;
+
+ if (Imm->getOpcode() == ISD::Constant) {
+ const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
+ Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true);
+ } else if (Imm->getOpcode() == ISD::ConstantFP) {
+ const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+ Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true);
+ }
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
+ case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
+ // These are space-optimized forms of OPC_EmitMergeInputChains.
+ assert(InputChain.getNode() == 0 &&
+ "EmitMergeInputChains should be the first chain producing node");
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (InputChain.getNode() == 0)
+ break; // Failed to merge.
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains: {
+ assert(InputChain.getNode() == 0 &&
+ "EmitMergeInputChains should be the first chain producing node");
+ // This node gets a list of nodes we matched in the input that have
+ // chains. We want to token factor all of the input chains to these nodes
+ // together. However, if any of the input chains is actually one of the
+ // nodes matched in this pattern, then we have an intra-match reference.
+ // Ignore these because the newly token factored chain should not refer to
+ // the old nodes.
+ unsigned NumChains = MatcherTable[MatcherIndex++];
+ assert(NumChains != 0 && "Can't TF zero chains");
+
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ for (unsigned i = 0; i != NumChains; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+ }
+
+ // If the inner loop broke out, the match fails.
+ if (ChainNodesMatched.empty())
+ break;
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (InputChain.getNode() == 0)
+ break; // Failed to merge.
+
+ continue;
+ }
+
+ case OPC_EmitCopyToReg: {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+
+ if (InputChain.getNode() == 0)
+ InputChain = CurDAG->getEntryNode();
+
+ InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
+ continue;
+ }
+
+ case OPC_EmitNodeXForm: {
+ unsigned XFormNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
+ continue;
+ }
+
+ case OPC_EmitNode:
+ case OPC_MorphNodeTo: {
+ uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+ TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+ // Get the result VT list.
+ unsigned NumVTs = MatcherTable[MatcherIndex++];
+ SmallVector<EVT, 4> VTs;
+ for (unsigned i = 0; i != NumVTs; ++i) {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
+ VTs.push_back(VT);
+ }
+
+ if (EmitNodeInfo & OPFL_Chain)
+ VTs.push_back(MVT::Other);
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
+ // This is hot code, so optimize the two most common cases of 1 and 2
+ // results.
+ SDVTList VTList;
+ if (VTs.size() == 1)
+ VTList = CurDAG->getVTList(VTs[0]);
+ else if (VTs.size() == 2)
+ VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+ else
+ VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+
+ // Get the operand list.
+ unsigned NumOps = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+ Ops.push_back(RecordedNodes[RecNo].first);
+ }
+
+ // If there are variadic operands to add, handle them now.
+ if (EmitNodeInfo & OPFL_VariadicInfo) {
+ // Determine the start index to copy from.
+ unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+ FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+ assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+ "Invalid variadic node");
+ // Copy all of the variadic operands, not including a potential glue
+ // input.
+ for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+ i != e; ++i) {
+ SDValue V = NodeToMatch->getOperand(i);
+ if (V.getValueType() == MVT::Glue) break;
+ Ops.push_back(V);
+ }
+ }
+
+ // If this has chain/glue inputs, add them.
+ if (EmitNodeInfo & OPFL_Chain)
+ Ops.push_back(InputChain);
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+ Ops.push_back(InputGlue);
+
+ // Create the node.
+ SDNode *Res = 0;
+ if (Opcode != OPC_MorphNodeTo) {
+ // If this is a normal EmitNode command, just create the new node and
+ // add the results to the RecordedNodes list.
+ Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
+ VTList, Ops.data(), Ops.size());
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ (SDNode*) 0));
+ }
+
+ } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) {
+ Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
+ EmitNodeInfo);
+ } else {
+ // NodeToMatch was eliminated by CSE when the target changed the DAG.
+ // We will visit the equivalent node later.
+ DEBUG(dbgs() << "Node was eliminated by CSE\n");
+ return 0;
+ }
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
+ if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-2);
+ } else if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-1);
+
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
+ // accumulated memrefs onto it.
+ //
+ // FIXME: This is vastly incorrect for patterns with multiple outputs
+ // instructions that access memory and for ComplexPatterns that match
+ // loads.
+ if (EmitNodeInfo & OPFL_MemRefs) {
+ // Only attach load or store memory operands if the generated
+ // instruction may load or store.
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc);
+ bool mayLoad = MCID.mayLoad();
+ bool mayStore = MCID.mayStore();
+
+ unsigned NumMemRefs = 0;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ ++NumMemRefs;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ ++NumMemRefs;
+ } else {
+ ++NumMemRefs;
+ }
+ }
+
+ MachineSDNode::mmo_iterator MemRefs =
+ MF->allocateMemRefsArray(NumMemRefs);
+
+ MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ *MemRefsPos++ = *I;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ *MemRefsPos++ = *I;
+ } else {
+ *MemRefsPos++ = *I;
+ }
+ }
+
+ cast<MachineSDNode>(Res)
+ ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
+ }
+
+ DEBUG(errs() << " "
+ << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+ << " node: "; Res->dump(CurDAG); errs() << "\n");
+
+ // If this was a MorphNodeTo then we're completely done!
+ if (Opcode == OPC_MorphNodeTo) {
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, true);
+ return Res;
+ }
+
+ continue;
+ }
+
+ case OPC_MarkGlueResults: {
+ unsigned NumNodes = MatcherTable[MatcherIndex++];
+
+ // Read and remember all the glue-result nodes.
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+ }
+ continue;
+ }
+
+ case OPC_CompleteMatch: {
+ // The match has been completed, and any new nodes (if any) have been
+ // created. Patch up references to the matched dag to use the newly
+ // created nodes.
+ unsigned NumResults = MatcherTable[MatcherIndex++];
+
+ for (unsigned i = 0; i != NumResults; ++i) {
+ unsigned ResSlot = MatcherTable[MatcherIndex++];
+ if (ResSlot & 128)
+ ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+ assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Res = RecordedNodes[ResSlot].first;
+
+ assert(i < NodeToMatch->getNumValues() &&
+ NodeToMatch->getValueType(i) != MVT::Other &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
+ "Invalid number of results to complete!");
+ assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+ NodeToMatch->getValueType(i) == MVT::iPTR ||
+ Res.getValueType() == MVT::iPTR ||
+ NodeToMatch->getValueType(i).getSizeInBits() ==
+ Res.getValueType().getSizeInBits()) &&
+ "invalid replacement");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+ }
+
+ // If the root node defines glue, add it to the glue nodes to update list.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+ GlueResultNodesMatched.push_back(NodeToMatch);
+
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, false);
+
+ assert(NodeToMatch->use_empty() &&
+ "Didn't replace all uses of the node?");
+
+ // FIXME: We just return here, which interacts correctly with SelectRoot
+ // above. We should fix this to not return an SDNode* anymore.
+ return 0;
+ }
+ }
+
+ // If the code reached this point, then the match failed. See if there is
+ // another child to try in the current 'Scope', otherwise pop it until we
+ // find a case to check.
+ DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
+ ++NumDAGIselRetries;
+ while (1) {
+ if (MatchScopes.empty()) {
+ CannotYetSelect(NodeToMatch);
+ return 0;
+ }
+
+ // Restore the interpreter state back to the point where the scope was
+ // formed.
+ MatchScope &LastScope = MatchScopes.back();
+ RecordedNodes.resize(LastScope.NumRecordedNodes);
+ NodeStack.clear();
+ NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+ N = NodeStack.back();
+
+ if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+ MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+ MatcherIndex = LastScope.FailIndex;
+
+ DEBUG(errs() << " Continuing at " << MatcherIndex << "\n");
+
+ InputChain = LastScope.InputChain;
+ InputGlue = LastScope.InputGlue;
+ if (!LastScope.HasChainNodesMatched)
+ ChainNodesMatched.clear();
+ if (!LastScope.HasGlueResultNodesMatched)
+ GlueResultNodesMatched.clear();
+
+ // Check to see what the offset is at the new MatcherIndex. If it is zero
+ // we have reached the end of this scope, otherwise we have another child
+ // in the current scope to try.
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+ // If we have another child in this scope to match, update FailIndex and
+ // try it.
+ if (NumToSkip != 0) {
+ LastScope.FailIndex = MatcherIndex+NumToSkip;
+ break;
+ }
+
+ // End of this scope, pop it and try the next child in the containing
+ // scope.
+ MatchScopes.pop_back();
+ }
+ }
+}
+
+
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Cannot select: ";
+
+ if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_VOID) {
+ N->printrFull(Msg, CurDAG);
+ Msg << "\nIn function: " << MF->getName();
+ } else {
+ bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+ unsigned iid =
+ cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+ if (iid < Intrinsic::num_intrinsics)
+ Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+ else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+ Msg << "target intrinsic %" << TII->getName(iid);
+ else
+ Msg << "unknown intrinsic #" << iid;
+ }
+ report_fatal_error(Msg.str());
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 0000000..b752b482
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,299 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static bool hasEdgeDestLabels() {
+ return true;
+ }
+
+ static unsigned numEdgeDestLabels(const void *Node) {
+ return ((const SDNode *) Node)->getNumValues();
+ }
+
+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+ return ((const SDNode *) Node)->getValueType(i).getEVTString();
+ }
+
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+ return itostr(I - SDNodeIterator::begin((const SDNode *) Node));
+ }
+
+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+ /// should actually target another edge source, not a node. If this method
+ /// is implemented, getEdgeTarget should be implemented.
+ template<typename EdgeIter>
+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+ return true;
+ }
+
+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+ /// called to determine which outgoing edge of Node is the target of this
+ /// edge.
+ template<typename EdgeIter>
+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+ SDNode *TargetNode = *I;
+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+ return NI;
+ }
+
+ static std::string getGraphName(const SelectionDAG *G) {
+ return G->getMachineFunction().getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+ const SelectionDAG *Graph) {
+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+ EVT VT = Op.getValueType();
+ if (VT == MVT::Glue)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getSimpleNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Result = Node->getOperationName(G);
+ {
+ raw_string_ostream OS(Result);
+ Node->print_details(OS, G);
+ }
+ return Result;
+ }
+ std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().getNode())
+ GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ "color=blue,style=dashed");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getName(),
+ false, Title);
+#else
+ errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+ viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+ NodeGraphAttrs.clear();
+#else
+ errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = Attrs;
+#else
+ errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+ return std::string();
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor. Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+ int level, bool &printed) {
+ bool hit_limit = false;
+
+#ifndef NDEBUG
+ if (level >= 20) {
+ if (!printed) {
+ printed = true;
+ DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+ }
+ return true;
+ }
+
+ unsigned oldSize = visited.size();
+ visited.insert(N);
+ if (visited.size() != oldSize) {
+ setGraphColor(N, Color);
+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+ i != iend;
+ ++i) {
+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+ }
+ }
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+ return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ DenseSet<SDNode *> visited;
+ bool printed = false;
+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+ // Visually mark that we hit the limit
+ if (strcmp(Color, "red") == 0) {
+ setSubgraphColorHelper(N, "blue", visited, 0, printed);
+ } else if (strcmp(Color, "yellow") == 0) {
+ setSubgraphColorHelper(N, "green", visited, 0, printed);
+ }
+ }
+
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream O(s);
+ O << "SU(" << SU->NodeNum << "): ";
+ if (SU->getNode()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ O << DOTGraphTraits<SelectionDAG*>
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
+ O << "\n ";
+ }
+ } else {
+ O << "CROSS RC COPY";
+ }
+ return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+ if (DAG) {
+ // Draw a special "GraphRoot" node to indicate the root of the graph.
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ const SDNode *N = DAG->getRoot().getNode();
+ if (N && N->getNodeId() != -1)
+ GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+ "color=blue,style=dashed");
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 0000000..f5fc66c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,2593 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+using namespace llvm;
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm,
+ const TargetLoweringObjectFile *tlof)
+ : TargetLoweringBase(tm, tlof) {}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return NULL;
+}
+
+/// Check whether a given call node is in tail position within its function. If
+/// so, it sets Chain to the input chain of the tail call.
+bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ SDValue &Chain) const {
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias).hasAttributes())
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return isUsedByReturnOnly(Node, Chain);
+}
+
+
+/// Generate a libcall taking the given operands as arguments and returning a
+/// result of type RetVT.
+SDValue TargetLowering::makeLibCall(SelectionDAG &DAG,
+ RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) const {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI);
+
+ return CallInfo.first;
+}
+
+
+/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) const {
+ assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
+ && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
+ (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
+ (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
+ break;
+ default:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ switch (CCCode) {
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ // Fallthrough
+ case ISD::SETUGT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUGE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+ break;
+ case ISD::SETULT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ break;
+ case ISD::SETULE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+ break;
+ case ISD::SETUEQ:
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ break;
+ default: llvm_unreachable("Do not know how to soften this setcc!");
+ }
+ }
+
+ // Use the target specific return value for comparions lib calls.
+ EVT RetVT = getCmpLibcallReturnType();
+ SDValue Ops[2] = { NewLHS, NewRHS };
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewRHS = DAG.getConstant(0, RetVT);
+ CCCode = getCmpLibcallCC(LC1);
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS,
+ NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
+ }
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function. The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+ // In non-pic modes, just use the address of a block.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return MachineJumpTableInfo::EK_BlockAddress;
+
+ // In PIC mode, if the target supports a GPRel32 directive, use it.
+ if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
+ return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+
+ // Otherwise, use a label difference.
+ return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ // If our PIC model is GP relative, use the global offset table as the base.
+ unsigned JTEncoding = getJumpTableEncoding();
+
+ if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+ (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0));
+
+ return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI,MCContext &Ctx) const{
+ // The normal PIC reloc base is the label at the start of the jump table.
+ return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // Assume that everything is safe in static mode.
+ if (getTargetMachine().getRelocationModel() == Reloc::Static)
+ return true;
+
+ // In dynamic-no-pic mode, assume that known defined values are safe.
+ if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
+ GA &&
+ !GA->getGlobal()->isDeclaration() &&
+ !GA->getGlobal()->isWeakForLinker())
+ return true;
+
+ // Otherwise assume nothing is safe.
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+ const APInt &Demanded) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::XOR:
+ case ISD::AND:
+ case ISD::OR: {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C) return false;
+
+ if (Op.getOpcode() == ISD::XOR &&
+ (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ return false;
+
+ // if we can expand it to have all bits set, do it
+ if (C->getAPIntValue().intersects(~Demanded)) {
+ EVT VT = Op.getValueType();
+ SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+ DAG.getConstant(Demanded &
+ C->getAPIntValue(),
+ VT));
+ return CombineTo(Op, New);
+ }
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+ unsigned BitWidth,
+ const APInt &Demanded,
+ DebugLoc dl) {
+ assert(Op.getNumOperands() == 2 &&
+ "ShrinkDemandedOp only supports binary operators!");
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "ShrinkDemandedOp only supports nodes with one result!");
+
+ // Don't do this if the node has another user, which may require the
+ // full value.
+ if (!Op.getNode()->hasOneUse())
+ return false;
+
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros();
+ unsigned SmallVTBits = DemandedSize;
+ if (!isPowerOf2_32(SmallVTBits))
+ SmallVTBits = NextPowerOf2(SmallVTBits);
+ for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+ if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ // We found a type with free casts.
+ SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(1)));
+ bool NeedZext = DemandedSize > SmallVTBits;
+ SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
+ dl, Op.getValueType(), X);
+ return CombineTo(Op, Z);
+ }
+ }
+ return false;
+}
+
+/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream. If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller). The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ unsigned BitWidth = DemandedMask.getBitWidth();
+ assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+ "Mask size mismatches value type size!");
+ APInt NewMask = DemandedMask;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Don't know anything.
+ KnownZero = KnownOne = APInt(BitWidth, 0);
+
+ // Other users may use these bits.
+ if (!Op.getNode()->hasOneUse()) {
+ if (Depth != 0) {
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // simplify things downstream.
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ return false;
+ }
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the NewMask to all bits.
+ NewMask = APInt::getAllOnesValue(BitWidth);
+ } else if (DemandedMask == 0) {
+ // Not demanding any bits from Op.
+ if (Op.getOpcode() != ISD::UNDEF)
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+ return false;
+ } else if (Depth == 6) { // Limit search depth.
+ return false;
+ }
+
+ APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::AND:
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt LHSZero, LHSOne;
+ // Do not increment Depth here; that can cause an infinite loop.
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
+ // If the LHS already has zeros where RHSC does, this and is dead.
+ if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+ return true;
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ break;
+ case ISD::OR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ break;
+ case ISD::XOR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((KnownZero & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((KnownZero2 & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // (but not both) turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1)));
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ // NB: it is okay if more bits are known than are requested
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
+ if (KnownOne == KnownOne2) { // set bits are the same on both sides
+ EVT VT = Op.getValueType();
+ SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ Op.getOperand(0), ANDC));
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // for XOR, we prefer to force bits to 1 if they will make a -1.
+ // if we can't force bits, try to shrink constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt Expanded = C->getAPIntValue() | (~NewMask);
+ // if we can expand it to have all bits set, do it
+ if (Expanded.isAllOnesValue()) {
+ if (Expanded != C->getAPIntValue()) {
+ EVT VT = Op.getValueType();
+ SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+ TLO.DAG.getConstant(Expanded, VT));
+ return TLO.CombineTo(Op, New);
+ }
+ // if it already has all the bits set, nothing to change
+ // but don't shrink either!
+ } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+ return true;
+ }
+ }
+
+ KnownZero = KnownZeroOut;
+ KnownOne = KnownOneOut;
+ break;
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SHL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (InOp.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+
+ // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+ // are not demanded. This will likely allow the anyext to be folded away.
+ if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = InOp.getNode()->getOperand(0);
+ EVT InnerVT = InnerOp.getValueType();
+ unsigned InnerBits = InnerVT.getSizeInBits();
+ if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
+ isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+ EVT ShTy = getShiftAmountTy(InnerVT);
+ if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
+ ShTy = InnerVT;
+ SDValue NarrowShl =
+ TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getConstant(ShAmt, ShTy));
+ return
+ TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
+ NarrowShl));
+ }
+ }
+
+ KnownZero <<= SA->getZExtValue();
+ KnownOne <<= SA->getZExtValue();
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ }
+ break;
+ case ISD::SRL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+ unsigned VTSize = VT.getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ if (InOp.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ break;
+ case ISD::SRA:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (NewMask == 1)
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1)));
+
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ APInt InDemandedMask = (NewMask << ShAmt);
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ if (HighBits.intersects(NewMask))
+ InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bit, adjusted to where it is now in the mask.
+ APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ Op.getOperand(0),
+ Op.getOperand(1)));
+ } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+ // If we only care about the highest bit, don't bother shifting right.
+ if (MsbMask == DemandedMask) {
+ unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp, ShiftAmt));
+ }
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth - ExVT.getScalarType().getSizeInBits());
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if ((NewBits & NewMask) == 0)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ APInt InSignBit =
+ APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
+ APInt InputDemandedBits =
+ APInt::getLowBitsSet(BitWidth,
+ ExVT.getScalarType().getSizeInBits()) &
+ NewMask;
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits |= InSignBit;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
+
+ if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+ if (!NewBits.intersects(NewMask))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
+ APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt NewBits = ~InMask & NewMask;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ APInt InDemandedBits = InMask & NewMask;
+ InDemandedBits |= InSignBit;
+ InDemandedBits = InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // If the sign bit is known one, the top bits match.
+ if (KnownOne.intersects(InSignBit)) {
+ KnownOne |= NewBits;
+ assert((KnownZero & NewBits) == 0);
+ } else { // Otherwise, top bits aren't known.
+ assert((KnownOne & NewBits) == 0);
+ assert((KnownZero & NewBits) == 0);
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt TruncMask = NewMask.zext(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ if (Op.getOperand(0).getNode()->hasOneUse()) {
+ SDValue In = Op.getOperand(0);
+ switch (In.getOpcode()) {
+ default: break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (TLO.LegalTypes() &&
+ !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+ // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+ // undesirable.
+ break;
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+ if (!ShAmt)
+ break;
+ SDValue Shift = In.getOperand(1);
+ if (TLO.LegalTypes()) {
+ uint64_t ShVal = ShAmt->getZExtValue();
+ Shift =
+ TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+ }
+
+ APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+ OperandBitWidth - BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
+
+ if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+ Op.getValueType(),
+ NewTrunc,
+ Shift));
+ }
+ break;
+ }
+ }
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::AssertZext: {
+ // AssertZext demands all of the high bits, plus any of the low bits
+ // demanded by its users.
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ KnownZero |= ~InMask & NewMask;
+ break;
+ }
+ case ISD::BITCAST:
+ // If this is an FP->Int bitcast and if the sign bit is the only
+ // thing demanded, turn this into a FGETSIGN.
+ if (!TLO.LegalOperations() &&
+ !Op.getValueType().isVector() &&
+ !Op.getOperand(0).getValueType().isVector() &&
+ NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+ Op.getOperand(0).getValueType().isFloatingPoint()) {
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+ EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+ // place. We expect the SHL to be eliminated by other optimizations.
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+ unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+ if (!OpVTLegal && OpVTSizeInBits > 32)
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
+ unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(),
+ Sign, ShAmt));
+ }
+ }
+ break;
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SUB: {
+ // Add, Sub, and Mul don't demand any bits in positions beyond that
+ // of the highest bit demanded of them.
+ APInt LoMask = APInt::getLowBitsSet(BitWidth,
+ BitWidth - NewMask.countLeadingZeros());
+ if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ // See if the operation should be performed at a smaller bit width.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+ }
+ // FALL THROUGH
+ default:
+ // Just use ComputeMaskedBits to compute output bits.
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+
+ return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// determine which bit is set.
+///
+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
+ // A left-shift of a constant one will have exactly one bit set, because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue() == 1)
+ return true;
+
+ // Similarly, a right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue().isSignBit())
+ return true;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to ComputeMaskedBits to catch other known cases.
+ EVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(Val, KnownZero, KnownOne);
+ return (KnownZero.countPopulation() == BitWidth - 1) &&
+ (KnownOne.countPopulation() == 1);
+}
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// and cc. If it is unable to simplify it, return a null SDValue.
+SDValue
+TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI, DebugLoc dl) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return DAG.getConstant(1, VT);
+ }
+
+ // Ensure that the constant occurs on the RHS, and fold constant
+ // comparisons.
+ if (isa<ConstantSDNode>(N0.getNode()))
+ return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ const APInt &ShAmt
+ = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+ Zero, Cond);
+ }
+ }
+
+ SDValue CTPOP = N0;
+ // Look through truncs that don't change the value of a ctpop.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+ (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+ Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+ DAG.getConstant(1, CTVT));
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+ }
+
+ // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ }
+
+ // (zext x) == C --> x == (trunc C)
+ if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreZExt;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreZExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countTrailingOnes();
+ PreZExt = N0->getOperand(0);
+ }
+ } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreZExt = N0;
+ }
+ }
+
+ // Make sure we're not losing bits from the constant.
+ if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+ }
+ }
+
+ // If the LHS is '(and load, const)', the RHS is 0,
+ // the test is for equality or unsigned, and all 1 bits of the const are
+ // in the same partial word, see if we can shorten the load.
+ if (DCI.isBeforeLegalize() &&
+ N0.getOpcode() == ISD::AND && C1 == 0 &&
+ N0.getNode()->hasOneUse() &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(0).getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ APInt bestMask;
+ unsigned bestWidth = 0, bestOffset = 0;
+ if (!Lod->isVolatile() && Lod->isUnindexed()) {
+ unsigned origWidth = N0.getValueType().getSizeInBits();
+ unsigned maskWidth = origWidth;
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // 8 bits, but have to be careful...
+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+ origWidth = Lod->getMemoryVT().getSizeInBits();
+ const APInt &Mask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+ for (unsigned offset=0; offset<origWidth/width; offset++) {
+ if ((newMask & Mask) == Mask) {
+ if (!getDataLayout()->isLittleEndian())
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
+ else
+ bestOffset = (uint64_t)offset * (width/8);
+ bestMask = Mask.lshr(offset * (width/8) * 8);
+ bestWidth = width;
+ break;
+ }
+ newMask = newMask << width;
+ }
+ }
+ }
+ if (bestWidth) {
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
+ if (newVT.isRound()) {
+ EVT PtrType = Lod->getOperand(1).getValueType();
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+ DAG.getConstant(bestOffset, PtrType));
+ unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+ SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
+ false, false, false, NewAlign);
+ return DAG.getSetCC(dl, VT,
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask.trunc(bestWidth),
+ newVT)),
+ DAG.getConstant(0LL, newVT), Cond);
+ }
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+ C1.getBitWidth() - InSize))) {
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ: return DAG.getConstant(0, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE: return DAG.getConstant(1, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant(C1.isNegative(), VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant(C1.isNonNegative(), VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ EVT newVT = N0.getOperand(0).getValueType();
+ if (DCI.isBeforeLegalizeOps() ||
+ (isOperationLegal(ISD::SETCC, newVT) &&
+ getCondCodeAction(Cond, newVT.getSimpleVT())==Legal))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(C1.trunc(InSize), newVT),
+ Cond);
+ break;
+ }
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+ EVT ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+ // If the constant doesn't fit into the number of bits for the source of
+ // the sign extension, it is impossible for both sides to be equal.
+ if (C1.getMinSignedBits() > ExtSrcTyBits)
+ return DAG.getConstant(Cond == ISD::SETNE, VT);
+
+ SDValue ZextOp;
+ EVT Op0Ty = N0.getOperand(0).getValueType();
+ if (Op0Ty == ExtSrcTy) {
+ ZextOp = N0.getOperand(0);
+ } else {
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+ DAG.getConstant(Imm, Op0Ty));
+ }
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.getNode());
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(dl, VT, ZextOp,
+ DAG.getConstant(C1 & APInt::getLowBitsSet(
+ ExtDstTyBits,
+ ExtSrcTyBits),
+ ExtDstTy),
+ Cond);
+ } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC &&
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+ if (TrueWhenTrue)
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC,
+ N0.getOperand(0).getValueType().isInteger());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ unsigned BitWidth = N0.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N0,
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth-1))) {
+ // Okay, get the un-inverted input value.
+ SDValue Val;
+ if (N0.getOpcode() == ISD::XOR)
+ Val = N0.getOperand(0);
+ else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+
+ return DAG.getSetCC(dl, VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ } else if (N1C->getAPIntValue() == 1 &&
+ (VT == MVT::i1 ||
+ getBooleanContents(false) == ZeroOrOneBooleanContent)) {
+ SDValue Op0 = N0;
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+
+ if ((Op0.getOpcode() == ISD::XOR) &&
+ Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+ Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+ Cond);
+ }
+ if (Op0.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+ if (Op0.getValueType().bitsGT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, VT));
+ else if (Op0.getValueType().bitsLT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, VT));
+
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ if (Op0.getOpcode() == ISD::AssertZext &&
+ cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+
+ APInt MinVal, MaxVal;
+ unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = APInt::getSignedMinValue(OperandBitSize);
+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+ } else {
+ MinVal = APInt::getMinValue(OperandBitSize);
+ MaxVal = APInt::getMaxValue(OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
+ // X >= C0 --> X > (C0-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1-1, N1.getValueType()),
+ (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
+ // X <= C0 --> X < (C0+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1+1, N1.getValueType()),
+ (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+ }
+
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+ return DAG.getConstant(0, VT); // X < MIN --> false
+ if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+ return DAG.getConstant(1, VT); // X >= MIN --> true
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+ return DAG.getConstant(0, VT); // X > MAX --> false
+ if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+ return DAG.getConstant(1, VT); // X <= MAX --> true
+
+ // Canonicalize setgt X, Min --> setne X, Min
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+ // Canonicalize setlt X, Max --> setne X, Max
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
+ ISD::SETEQ);
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, N0.getValueType()),
+ ISD::SETEQ);
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
+
+ // Fold bit comparisons when we can.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ (VT == N0.getValueType() ||
+ (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+ N0.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ if (AndRHS->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ if (C1.isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(C1.logBase2(), ShiftTy)));
+ }
+ }
+ }
+
+ if (C1.getMinSignedBits() <= 64 &&
+ !isLegalICmpImmediate(C1.getSExtValue())) {
+ // (X & -256) == 256 -> (X >> 8) == 1
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC = AndRHS->getAPIntValue();
+ if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ unsigned ShiftBits = AndRHSC.countTrailingZeros();
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
+ } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
+ Cond == ISD::SETULE || Cond == ISD::SETUGT) {
+ bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
+ // X < 0x100000000 -> (X >> 32) < 1
+ // X >= 0x100000000 -> (X >> 32) >= 1
+ // X <= 0x0ffffffff -> (X >> 32) < 1
+ // X > 0x0ffffffff -> (X >> 32) >= 1
+ unsigned ShiftBits;
+ APInt NewC = C1;
+ ISD::CondCode NewCond = Cond;
+ if (AdjOne) {
+ ShiftBits = C1.countTrailingOnes();
+ NewC = NewC + 1;
+ NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ } else {
+ ShiftBits = C1.countTrailingZeros();
+ }
+ NewC = NewC.lshr(ShiftBits);
+ if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(NewC, CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
+ }
+ }
+ }
+ }
+
+ if (isa<ConstantFPSDNode>(N0.getNode())) {
+ // Constant fold or commute setcc.
+ SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+ if (O.getNode()) return O;
+ } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ // If the RHS of an FP comparison is a constant, simplify it away in
+ // some cases.
+ if (CFP->getValueAPF().isNaN()) {
+ // If an operand is known to be a nan, we can fold it.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default: llvm_unreachable("Unknown flavor!");
+ case 0: // Known false.
+ return DAG.getConstant(0, VT);
+ case 1: // Known true.
+ return DAG.getConstant(1, VT);
+ case 2: // Undefined.
+ return DAG.getUNDEF(VT);
+ }
+ }
+
+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
+ // constant if knowing that the operand is non-nan is enough. We prefer to
+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+ // materialize 0.0.
+ if (Cond == ISD::SETO || Cond == ISD::SETUO)
+ return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+ // If the condition is not legal, see if we can find an equivalent one
+ // which is legal.
+ if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
+ // If the comparison was an awkward floating-point == or != and one of
+ // the comparison operands is infinity or negative infinity, convert the
+ // condition to a less-awkward <= or >=.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+ } else {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+ }
+ }
+ }
+ }
+
+ if (N0 == N1) {
+ // The sext(setcc()) => setcc() optimization relies on the appropriate
+ // constant being emitted.
+ uint64_t EqVal = 0;
+ switch (getBooleanContents(N0.getValueType().isVector())) {
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond);
+ break;
+ case ZeroOrNegativeOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
+ break;
+ }
+
+ // We can always fold X == X for integer setcc's.
+ if (N0.getValueType().isInteger()) {
+ return DAG.getConstant(EqVal, VT);
+ }
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getConstant(EqVal, VT);
+ if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+ return DAG.getConstant(EqVal, VT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
+ getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal))
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getValueType().isInteger()) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ Cond);
+ }
+ }
+
+ // If RHS is a legal immediate value for a compare instruction, we need
+ // to be careful about increasing register pressure needlessly.
+ bool LegalRHSImm = false;
+
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue()-
+ LHSR->getAPIntValue(),
+ N0.getValueType()), Cond);
+ }
+
+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+ if (N0.getOpcode() == ISD::XOR)
+ // If we know that all of the inverted bits are zero, don't bother
+ // performing the inversion.
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() -
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+ }
+
+ // Could RHSC fold directly into a compare?
+ if (RHSC->getValueType(0).getSizeInBits() <= 64)
+ LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
+ }
+
+ // Simplify (X+Z) == X --> Z == 0
+ // Don't do this if X is an immediate that can fold into a cmp
+ // instruction and X+Z has other uses. It could be an induction variable
+ // chain, and the transform would increase register pressure.
+ if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
+ DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
+ }
+ }
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR) {
+ // Simplify X == (X+Z) --> Z == 0
+ if (N1.getOperand(0) == N0)
+ return DAG.getSetCC(dl, VT, N1.getOperand(1),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ if (N1.getOperand(1) == N0) {
+ if (DAG.isCommutativeBinOp(N1.getOpcode()))
+ return DAG.getSetCC(dl, VT, N1.getOperand(0),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ if (N1.getNode()->hasOneUse()) {
+ assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // X == (Z-X) --> X<<1 == Z
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ DAG.getConstant(1, getShiftAmountTy(N0.getValueType())));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+ }
+ }
+ }
+
+ // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
+ // Note that where y is variable and is known to have at most
+ // one bit set (for example, if it is z&1) we cannot do this;
+ // the expressions are not equivalent when y==0.
+ if (N0.getOpcode() == ISD::AND)
+ if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
+ if (ValueHasExactlyOneBitSet(N1, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ }
+ }
+ if (N1.getOpcode() == ISD::AND)
+ if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
+ if (ValueHasExactlyOneBitSet(N0, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ }
+ }
+ }
+
+ // Fold away ALL boolean setcc's.
+ SDValue Temp;
+ if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, MVT::i1);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+ break;
+ }
+ if (VT != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.getNode());
+ // FIXME: If running after legalize, we probably can't do this.
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
+ int64_t &Offset) const {
+ if (isa<GlobalAddressSDNode>(N)) {
+ GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+ GA = GASD->getGlobal();
+ Offset += GASD->getOffset();
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue N1 = N->getOperand(0);
+ SDValue N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+
+SDValue TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+ unsigned S = Constraint.size();
+
+ if (S == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
+ case 's': // Relocatable Constant
+ case 'p': // Address.
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case '<':
+ case '>':
+ return C_Other;
+ }
+ }
+
+ if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
+ if (S == 8 && !Constraint.compare(1, 6, "memory", 6)) // "{memory}"
+ return C_Memory;
+ return C_Register;
+ }
+ return C_Unknown;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+ if (ConstraintVT.isInteger())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "f"; // works for many targets
+ return 0;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'X': // Allows any operand; labels (basic block) use this.
+ if (Op.getOpcode() == ISD::BasicBlock) {
+ Ops.push_back(Op);
+ return;
+ }
+ // fall through
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': { // Relocatable Constant
+ // These operands are interested in values of the form (GV+C), where C may
+ // be folded in as an offset of GV, or it may be explicitly added. Also, it
+ // is possible and fine if either GV or C are missing.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+ // If we have "(add GV, C)", pull out GV/C
+ if (Op.getOpcode() == ISD::ADD) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (C == 0 || GA == 0) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+ }
+ if (C == 0 || GA == 0)
+ C = 0, GA = 0;
+ }
+
+ // If we find a valid operand, map to the TargetXXX version so that the
+ // value itself doesn't get selected.
+ if (GA) { // Either &GV or &GV+C
+ if (ConstraintLetter != 'n') {
+ int64_t Offs = GA->getOffset();
+ if (C) Offs += C->getZExtValue();
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ C ? C->getDebugLoc() : DebugLoc(),
+ Op.getValueType(), Offs));
+ return;
+ }
+ }
+ if (C) { // just C, no GV.
+ // Simple constants are not allowed for 's'.
+ if (ConstraintLetter != 's') {
+ // gcc prints these as sign extended. Sign extend value to 64 bits
+ // now; without this it would get ZExt'd later in
+ // ScheduleDAGSDNodes::EmitNode, which is very generic.
+ Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ MVT::i64));
+ return;
+ }
+ }
+ break;
+ }
+ }
+}
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint[0] != '{')
+ return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
+ assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+
+ std::pair<unsigned, const TargetRegisterClass*> R =
+ std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+
+ // Figure out which register class contains this reg.
+ const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo();
+ for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+ E = RI->regclass_end(); RCI != E; ++RCI) {
+ const TargetRegisterClass *RC = *RCI;
+
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ if (!isLegalRC(RC))
+ continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (RegName.equals_lower(RI->getName(*I))) {
+ std::pair<unsigned, const TargetRegisterClass*> S =
+ std::make_pair(*I, RC);
+
+ // If this register class has the requested value type, return it,
+ // otherwise keep searching and return the first class found
+ // if no other is found which explicitly has the requested type.
+ if (RC->hasType(VT))
+ return S;
+ else if (!R.second)
+ R = S;
+ }
+ }
+ }
+
+ return R;
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// isMatchingInputConstraint - Return true of this is an input operand that is
+/// a matching constraint like "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
+}
+
+/// getMatchedOperand - If this is an input matching constraint, this method
+/// returns the output operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return atoi(ConstraintCode.c_str());
+}
+
+
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+ ImmutableCallSite CS) const {
+ /// ConstraintOperands - Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ InlineAsm::ConstraintInfoVector
+ ConstraintInfos = IA->ParseConstraints();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = getSimpleValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpInfo.ConstraintVT =
+ MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ break;
+ }
+ } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
+ OpInfo.ConstraintVT = MVT::getIntegerVT(
+ 8*getDataLayout()->getPointerSize(PT->getAddressSpace()));
+ } else {
+ OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
+ }
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (ConstraintInfos.size()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ if (cInfo.Type == InlineAsm::isClobber)
+ continue;
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ getRegForInlineAsmConstraint(Input.ConstraintCode,
+ Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+
+/// getConstraintGenerality - Return an integer indicating how general CT
+/// is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+ llvm_unreachable("Invalid constraint type");
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
+/// ChooseConstraint - If there are multiple different constraints that we
+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+/// Other -> immediates and magic values
+/// Register -> one specific register
+/// RegisterClass -> a group of regs
+/// Memory -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it. The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory. Because of
+/// this the heuristic we use is:
+///
+/// 1) If there is an 'other' constraint, and if the operand is valid for
+/// that constraint, use it. This makes us take advantage of 'i'
+/// constraints when available.
+/// 2) Otherwise, pick the most general constraint present. This prefers
+/// 'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ const TargetLowering &TLI,
+ SDValue Op, SelectionDAG *DAG) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI.getConstraintType(OpInfo.Codes[i]);
+
+ // If this is an 'other' constraint, see if the operand is valid for it.
+ // For example, on X86 we might have an 'rI' constraint. If the operand
+ // is an integer in the range [0..31] we want to use I (saving a load
+ // of a register), otherwise we must use 'r'.
+ if (CType == TargetLowering::C_Other && Op.getNode()) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
+ ResultOps, *DAG);
+ if (!ResultOps.empty()) {
+ BestType = CType;
+ BestIdx = i;
+ break;
+ }
+ }
+
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+/// ComputeConstraintToUse - Determines the constraint code and constraint
+/// type to use for the specific AsmOperandInfo, setting
+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+ SDValue Op,
+ SelectionDAG *DAG) const {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ ChooseConstraint(OpInfo, *this, Op, DAG);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels). For Functions, the type here is the type of
+ // the result, which is not what we want to look at; leave them alone.
+ Value *v = OpInfo.CallOperandVal;
+ if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+ OpInfo.CallOperandVal = v;
+ return;
+ }
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+ SelectionDAG &DAG) const {
+ ConstantSDNode *C = cast<ConstantSDNode>(Op2);
+ APInt d = C->getAPIntValue();
+ assert(d != 0 && "Division by zero!");
+
+ // Shift the value upfront if it is even, so the LSB is one.
+ unsigned ShAmt = d.countTrailingZeros();
+ if (ShAmt) {
+ // TODO: For UDIV use SRL instead of SRA.
+ SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+ Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+ d = d.ashr(ShAmt);
+ }
+
+ // Calculate the multiplicative inverse, using Newton's method.
+ APInt t, xn = d;
+ while ((t = d*xn) != 1)
+ xn *= APInt(d.getBitWidth(), 2) - t;
+
+ Op2 = DAG.getConstant(xn, Op1.getValueType());
+ return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::
+BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*> *Created) const {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl= N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::ms magics = d.magic();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
+ isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhs or equvialent
+ // If d > 0 and m < 0, add the numerator
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // If d < 0 and m > 0, subtract the numerator.
+ if (d.isNegative() && magics.m.isStrictlyPositive()) {
+ Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Shift right algebraic if shift value is nonzero
+ if (magics.s > 0) {
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Extract the sign bit and add it to the quotient
+ SDValue T =
+ DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(T.getNode());
+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::
+BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*> *Created) const {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::mu magics = N1C.magicu();
+
+ SDValue Q = N->getOperand(0);
+
+ // If the divisor is even, we can avoid using the expensive fixup by shifting
+ // the divided value upfront.
+ if (magics.a != 0 && !N1C[0]) {
+ unsigned Shift = N1C.countTrailingZeros();
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ // Get magic number for the shifted divisor.
+ magics = N1C.lshr(Shift).magicu(Shift);
+ assert(magics.a == 0 && "Should use cheap fixup now");
+ }
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
+ isOperationLegalOrCustom(ISD::MULHU, VT))
+ Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
+ else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhu or equvialent
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ if (magics.a == 0) {
+ assert(magics.s < N1C.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+ } else {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
new file mode 100644
index 0000000..f769b44
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
+ : TD(TM.getDataLayout()) {
+}
+
+TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
new file mode 100644
index 0000000..10f64c7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
@@ -0,0 +1,452 @@
+//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics for targets that do
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as algorithms which generate stack maps
+// to identify roots.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
+//
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// ShadowStackGC.
+//
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ShadowStackGC : public GCStrategy {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ ///
+ StructType *StackEntryTy;
+ StructType *FrameMapTy;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+
+ public:
+ ShadowStackGC();
+
+ bool initializeCustomLowering(Module &M);
+ bool performCustomLowering(Function &F);
+
+ private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ Type* GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, const char *Name);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, int Idx2, const char *Name);
+ };
+
+}
+
+static GCRegistry::Add<ShadowStackGC>
+X("shadow-stack", "Very portable GC for uncooperative code generators");
+
+namespace {
+ /// EscapeEnumerator - This is a little algorithm to find all escape points
+ /// from a function so that "finally"-style code can be inserted. In addition
+ /// to finding the existing return and unwind instructions, it also (if
+ /// necessary) transforms any call instructions into invokes and sends them to
+ /// a landing pad.
+ ///
+ /// It's wrapped up in a state machine using the same transform C# uses for
+ /// 'yield return' enumerators, This transform allows it to be non-allocating.
+ class EscapeEnumerator {
+ Function &F;
+ const char *CleanupBBName;
+
+ // State.
+ int State;
+ Function::iterator StateBB, StateE;
+ IRBuilder<> Builder;
+
+ public:
+ EscapeEnumerator(Function &F, const char *N = "cleanup")
+ : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+ IRBuilder<> *Next() {
+ switch (State) {
+ default:
+ return 0;
+
+ case 0:
+ StateBB = F.begin();
+ StateE = F.end();
+ State = 1;
+
+ case 1:
+ // Find all 'return', 'resume', and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = StateBB++;
+
+ // Branches and invokes do not escape, only unwind, resume, and return
+ // do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI->getParent(), TI);
+ return &Builder;
+ }
+
+ State = 2;
+
+ // Find all 'call' instructions.
+ SmallVector<Instruction*,16> Calls;
+ for (Function::iterator BB = F.begin(),
+ E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(),
+ EE = BB->end(); II != EE; ++II)
+ if (CallInst *CI = dyn_cast<CallInst>(II))
+ if (!CI->getCalledFunction() ||
+ !CI->getCalledFunction()->getIntrinsicID())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return 0;
+
+ // Create a cleanup block.
+ LLVMContext &C = F.getContext();
+ BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+ Type *ExnTy = StructType::get(Type::getInt8PtrTy(C),
+ Type::getInt32Ty(C), NULL);
+ Constant *PersFn =
+ F.getParent()->
+ getOrInsertFunction("__gcc_personality_v0",
+ FunctionType::get(Type::getInt32Ty(C), true));
+ LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1,
+ "cleanup.lpad",
+ CleanupBB);
+ LPad->setCleanup(true);
+ ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value*,16> Args;
+ for (unsigned I = Calls.size(); I != 0; ) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+
+ // Split the basic block containing the function call.
+ BasicBlock *CallBB = CI->getParent();
+ BasicBlock *NewBB =
+ CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+ // Remove the unconditional branch inserted at the end of CallBB.
+ CallBB->getInstList().pop_back();
+ NewBB->getInstList().remove(CI);
+
+ // Create a new invoke instruction.
+ Args.clear();
+ CallSite CS(CI);
+ Args.append(CS.arg_begin(), CS.arg_end());
+
+ InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
+ NewBB, CleanupBB,
+ Args, CI->getName(), CallBB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ delete CI;
+ }
+
+ Builder.SetInsertPoint(RI->getParent(), RI);
+ return &Builder;
+ }
+ }
+ };
+}
+
+// -----------------------------------------------------------------------------
+
+void llvm::linkShadowStackGC() { }
+
+ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+ InitRoots = true;
+ CustomRoots = true;
+}
+
+Constant *ShadowStackGC::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+ Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant*, 16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+ Metadata.resize(NumMeta);
+
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Int32Ty, Roots.size(), false),
+ ConstantInt::get(Int32Ty, NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(FrameMapTy, BaseElts),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
+ };
+
+ Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
+ StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta));
+
+ Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage,
+ FrameMap, "__gc_" + F.getName());
+
+ Constant *GEPIndices[2] = {
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
+ };
+ return ConstantExpr::getGetElementPtr(GV, GEPIndices);
+}
+
+Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<Type*> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (size_t I = 0; I != Roots.size(); I++)
+ EltTys.push_back(Roots[I].second->getAllocatedType());
+
+ return StructType::create(EltTys, "gc_stackentry."+F.getName().str());
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGC::initializeCustomLowering(Module &M) {
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<Type*> EltTys;
+ // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ // Specifies length of variable length array.
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ FrameMapTy = StructType::create(EltTys, "gc_map");
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+
+ StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+ EltTys.push_back(FrameMapPtrTy);
+ StackEntryTy->setBody(EltTys);
+ PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(M, StackEntryPtrTy, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy),
+ "llvm_gc_root_chain");
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGC::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGC::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
+ CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ if (IsNullValue(CI->getArgOperand(1)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, int Idx2, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
+ Value* Val = B.CreateGEP(BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx) };
+ Value *Val = B.CreateGEP(BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGC::performCustomLowering(Function &F) {
+ LLVMContext &Context = F.getContext();
+
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+ "gc_frame");
+
+ while (isa<AllocaInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry,
+ 0,1,"gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
+ StackEntry,0,0,"gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(Context, AtEntry,
+ StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup");
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
+ "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ Roots[I].first->eraseFromParent();
+ Roots[I].second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
new file mode 100644
index 0000000..9ab4918
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
@@ -0,0 +1,1152 @@
+//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a shrink wrapping variant of prolog/epilog insertion:
+// - Spills and restores of callee-saved registers (CSRs) are placed in the
+// machine CFG to tightly surround their uses so that execution paths that
+// do not use CSRs do not pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+// loop the spills are placed in the loop preheader, and restores are
+// placed in the loop exit nodes (the successors of loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses:
+// If a region in a CFG uses CSRs and has multiple entry and/or exit points,
+// the use info for the CSRs inside the region is propagated outward in the
+// CFG to ensure validity of the spill/restore placements. This decreases
+// the effectiveness of shrink wrapping but does not require edge splitting
+// in the machine CFG.
+//
+// This shrink wrapping implementation uses an iterative analysis to determine
+// which basic blocks require spills and restores for CSRs.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent placement of callee-saved register spills/restores
+// in the bodies of loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shrink-wrap"
+
+#include "PrologEpilogInserter.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <sstream>
+
+using namespace llvm;
+
+STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
+
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+ cl::desc("Shrink wrap callee-saved register spills/restores"));
+
+// Shrink wrap only the specified function, a debugging aid.
+static cl::opt<std::string>
+ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
+ cl::desc("Shrink wrap the specified function"),
+ cl::value_desc("funcname"),
+ cl::init(""));
+
+// Debugging level for shrink wrapping.
+enum ShrinkWrapDebugLevel {
+ None, BasicInfo, Iterations, Details
+};
+
+static cl::opt<enum ShrinkWrapDebugLevel>
+ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
+ cl::desc("Print shrink wrapping debugging information"),
+ cl::values(
+ clEnumVal(None , "disable debug output"),
+ clEnumVal(BasicInfo , "print basic DF sets"),
+ clEnumVal(Iterations, "print SR sets for each iteration"),
+ clEnumVal(Details , "print all DF sets"),
+ clEnumValEnd));
+
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+// ShrinkWrapping implementation
+//===----------------------------------------------------------------------===//
+
+// Convienences for dealing with machine loops.
+MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
+ assert(LP && "Machine loop is NULL.");
+ MachineBasicBlock* PHDR = LP->getLoopPreheader();
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ PHDR = PLP->getLoopPreheader();
+ PLP = PLP->getParentLoop();
+ }
+ return PHDR;
+}
+
+MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
+ if (LP == 0)
+ return 0;
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ LP = PLP;
+ PLP = PLP->getParentLoop();
+ }
+ return LP;
+}
+
+bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+ return (MBB && !MBB->empty() && MBB->back().isReturn());
+}
+
+// Initialize shrink wrapping DFA sets, called before iterations.
+void PEI::clearAnticAvailSets() {
+ AnticIn.clear();
+ AnticOut.clear();
+ AvailIn.clear();
+ AvailOut.clear();
+}
+
+// Clear all sets constructed by shrink wrapping.
+void PEI::clearAllSets() {
+ ReturnBlocks.clear();
+ clearAnticAvailSets();
+ UsedCSRegs.clear();
+ CSRUsed.clear();
+ TLLoops.clear();
+ CSRSave.clear();
+ CSRRestore.clear();
+}
+
+// Initialize all shrink wrapping data.
+void PEI::initShrinkWrappingInfo() {
+ clearAllSets();
+ EntryBlock = 0;
+#ifndef NDEBUG
+ HasFastExitPath = false;
+#endif
+ ShrinkWrapThisFunction = ShrinkWrapping;
+ // DEBUG: enable or disable shrink wrapping for the current function
+ // via --shrink-wrap-func=<funcname>.
+#ifndef NDEBUG
+ if (ShrinkWrapFunc != "") {
+ std::string MFName = MF->getName().str();
+ ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
+ }
+#endif
+}
+
+
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+ DEBUG(MF = &Fn);
+
+ initShrinkWrappingInfo();
+
+ DEBUG(if (ShrinkWrapThisFunction) {
+ dbgs() << "Place CSR spills/restores for "
+ << MF->getName() << "\n";
+ });
+
+ if (calculateSets(Fn))
+ placeSpillsAndRestores(Fn);
+}
+
+/// calcAnticInOut - calculate the anticipated in/out reg sets
+/// for the given MBB by looking forward in the MCFG at MBB's
+/// successors.
+///
+bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ CSRegSet prevAnticOut = AnticOut[MBB];
+ MachineBasicBlock* SUCC = successors[i];
+
+ AnticOut[MBB] = AnticIn[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ AnticOut[MBB] &= AnticIn[SUCC];
+ }
+ if (prevAnticOut != AnticOut[MBB])
+ changed = true;
+ }
+
+ // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
+ CSRegSet prevAnticIn = AnticIn[MBB];
+ AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+ if (prevAnticIn != AnticIn[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calcAvailInOut - calculate the available in/out reg sets
+/// for the given MBB by looking backward in the MCFG at MBB's
+/// predecessors.
+///
+bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ CSRegSet prevAvailIn = AvailIn[MBB];
+ MachineBasicBlock* PRED = predecessors[i];
+
+ AvailIn[MBB] = AvailOut[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ AvailIn[MBB] &= AvailOut[PRED];
+ }
+ if (prevAvailIn != AvailIn[MBB])
+ changed = true;
+ }
+
+ // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
+ CSRegSet prevAvailOut = AvailOut[MBB];
+ AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+ if (prevAvailOut != AvailOut[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calculateAnticAvail - build the sets anticipated and available
+/// registers in the MCFG of the current function iteratively,
+/// doing a combined forward and backward analysis.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+ // Initialize data flow sets.
+ clearAnticAvailSets();
+
+ // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+ bool changed = true;
+ unsigned iterations = 0;
+ while (changed) {
+ changed = false;
+ ++iterations;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Calculate anticipated in, out regs at MBB from
+ // anticipated at successors of MBB.
+ changed |= calcAnticInOut(MBB);
+
+ // Calculate available in, out regs at MBB from
+ // available at predecessors of MBB.
+ changed |= calcAvailInOut(MBB);
+ }
+ }
+
+ DEBUG({
+ if (ShrinkWrapDebugging >= Details) {
+ dbgs()
+ << "-----------------------------------------------------------\n"
+ << " Antic/Avail Sets:\n"
+ << "-----------------------------------------------------------\n"
+ << "iterations = " << iterations << "\n"
+ << "-----------------------------------------------------------\n"
+ << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
+ << "-----------------------------------------------------------\n";
+
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets(MBB);
+ }
+
+ dbgs()
+ << "-----------------------------------------------------------\n";
+ }
+ });
+}
+
+/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
+/// of the loop given by LP and its parent loops. This prevents spills/restores
+/// from being placed in the bodies of loops.
+///
+void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
+ if (! MBB || !LP)
+ return;
+
+ std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
+ for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* LBB = loopBlocks[i];
+ if (LBB == MBB)
+ continue;
+ if (CSRUsed[LBB].contains(CSRUsed[MBB]))
+ continue;
+ CSRUsed[LBB] |= CSRUsed[MBB];
+ }
+}
+
+/// calculateSets - collect the CSRs used in this function, compute
+/// the DF sets that describe the initial minimal regions in the
+/// Machine CFG around which CSR spills and restores must be placed.
+///
+/// Additionally, this function decides if shrink wrapping should
+/// be disabled for the current function, checking the following:
+/// 1. the current function has more than 500 MBBs: heuristic limit
+/// on function size to reduce compile time impact of the current
+/// iterative algorithm.
+/// 2. all CSRs are used in the entry block.
+/// 3. all CSRs are used in all immediate successors of the entry block.
+/// 4. all CSRs are used in a subset of blocks, each of which dominates
+/// all return blocks. These blocks, taken as a subgraph of the MCFG,
+/// are equivalent to the entry block since all execution paths pass
+/// through them.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+ // Sets used to compute spill, restore placement sets.
+ const std::vector<CalleeSavedInfo> CSI =
+ Fn.getFrameInfo()->getCalleeSavedInfo();
+
+ // If no CSRs used, we are done.
+ if (CSI.empty()) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ dbgs() << "DISABLED: " << Fn.getName()
+ << ": uses no callee-saved registers\n");
+ return false;
+ }
+
+ // Save refs to entry and return blocks.
+ EntryBlock = Fn.begin();
+ for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+ MBB != E; ++MBB)
+ if (isReturnBlock(MBB))
+ ReturnBlocks.push_back(MBB);
+
+ // Determine if this function has fast exit paths.
+ DEBUG(if (ShrinkWrapThisFunction)
+ findFastExitPath());
+
+ // Limit shrink wrapping via the current iterative bit vector
+ // implementation to functions with <= 500 MBBs.
+ if (Fn.size() > 500) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ dbgs() << "DISABLED: " << Fn.getName()
+ << ": too large (" << Fn.size() << " MBBs)\n");
+ ShrinkWrapThisFunction = false;
+ }
+
+ // Return now if not shrink wrapping.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ // Collect set of used CSRs.
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ UsedCSRegs.set(inx);
+ }
+
+ // Walk instructions in all MBBs, create CSRUsed[] sets, choose
+ // whether or not to shrink wrap this function.
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+ bool allCSRUsesInEntryBlock = true;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ unsigned Reg = CSI[inx].getReg();
+ // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+ // CSRUsed map for the current block.
+ for (unsigned opInx = 0, opEnd = I->getNumOperands();
+ opInx != opEnd; ++opInx) {
+ const MachineOperand &MO = I->getOperand(opInx);
+ if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(Reg, MOReg))) {
+ // CSR Reg is defined/used in block MBB.
+ CSRUsed[MBB].set(inx);
+ // Check for uses in EntryBlock.
+ if (MBB != EntryBlock)
+ allCSRUsesInEntryBlock = false;
+ }
+ }
+ }
+ }
+
+ if (CSRUsed[MBB].empty())
+ continue;
+
+ // Propagate CSRUsed[MBB] in loops
+ if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+ // Add top level loop to work list.
+ MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
+ MachineLoop* PLP = getTopLevelLoopParent(LP);
+
+ if (! HDR) {
+ HDR = PLP->getHeader();
+ assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
+ MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
+ HDR = *PI;
+ }
+ TLLoops[HDR] = PLP;
+
+ // Push uses from inside loop to its parent loops,
+ // or to all other MBBs in its loop.
+ if (LP->getLoopDepth() > 1) {
+ for (MachineLoop* PLP = LP->getParentLoop(); PLP;
+ PLP = PLP->getParentLoop()) {
+ propagateUsesAroundLoop(MBB, PLP);
+ }
+ } else {
+ propagateUsesAroundLoop(MBB, LP);
+ }
+ }
+ }
+
+ if (allCSRUsesInEntryBlock) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getName()
+ << ": all CSRs used in EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ } else {
+ bool allCSRsUsedInEntryFanout = true;
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (CSRUsed[SUCC] != UsedCSRegs)
+ allCSRsUsedInEntryFanout = false;
+ }
+ if (allCSRsUsedInEntryFanout) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getName()
+ << ": all CSRs used in imm successors of EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ }
+ }
+
+ if (ShrinkWrapThisFunction) {
+ // Check if MBB uses CSRs and dominates all exit nodes.
+ // Such nodes are equiv. to the entry node w.r.t.
+ // CSR uses: every path through the function must
+ // pass through this node. If each CSR is used at least
+ // once by these nodes, shrink wrapping is disabled.
+ CSRegSet CSRUsedInChokePoints;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
+ continue;
+ bool dominatesExitNodes = true;
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ if (! DT.dominates(MBB, ReturnBlocks[ri])) {
+ dominatesExitNodes = false;
+ break;
+ }
+ if (dominatesExitNodes) {
+ CSRUsedInChokePoints |= CSRUsed[MBB];
+ if (CSRUsedInChokePoints == UsedCSRegs) {
+ DEBUG(dbgs() << "DISABLED: " << Fn.getName()
+ << ": all CSRs used in choke point(s) at "
+ << getBasicBlockName(MBB) << "\n");
+ ShrinkWrapThisFunction = false;
+ break;
+ }
+ }
+ }
+ }
+
+ // Return now if we have decided not to apply shrink wrapping
+ // to the current function.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ DEBUG({
+ dbgs() << "ENABLED: " << Fn.getName();
+ if (HasFastExitPath)
+ dbgs() << " (fast exit path)";
+ dbgs() << "\n";
+ if (ShrinkWrapDebugging >= BasicInfo) {
+ dbgs() << "------------------------------"
+ << "-----------------------------\n";
+ dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+ if (ShrinkWrapDebugging >= Details) {
+ dbgs() << "------------------------------"
+ << "-----------------------------\n";
+ dumpAllUsed();
+ }
+ }
+ });
+
+ // Build initial DF sets to determine minimal regions in the
+ // Machine CFG around which CSRs must be spilled and restored.
+ calculateAnticAvail(Fn);
+
+ return true;
+}
+
+/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
+/// multi-entry, multi-exit (MEME) regions so spill and restore
+/// placement will not break code that enters or leaves a
+/// shrink-wrapped region by inducing spills with no matching
+/// restores or restores with no matching spills. A MEME region
+/// is a subgraph of the MCFG with multiple entry edges, multiple
+/// exit edges, or both. This code propagates use information
+/// through the MCFG until all paths requiring spills and restores
+/// _outside_ the computed minimal placement regions have been covered.
+///
+bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks) {
+ if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
+ bool processThisBlock = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC->pred_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED->succ_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ }
+ if (! processThisBlock)
+ return false;
+ }
+
+ CSRegSet prop;
+ if (!CSRSave[MBB].empty())
+ prop = CSRSave[MBB];
+ else if (!CSRRestore[MBB].empty())
+ prop = CSRRestore[MBB];
+ else
+ prop = CSRUsed[MBB];
+ if (prop.empty())
+ return false;
+
+ // Propagate selected bits to successors, predecessors of MBB.
+ bool addedUses = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ // Self-loop
+ if (SUCC == MBB)
+ continue;
+ if (! CSRUsed[SUCC].contains(prop)) {
+ CSRUsed[SUCC] |= prop;
+ addedUses = true;
+ blks.push_back(SUCC);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "successor " << getBasicBlockName(SUCC) << "\n");
+ }
+ }
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ // Self-loop
+ if (PRED == MBB)
+ continue;
+ if (! CSRUsed[PRED].contains(prop)) {
+ CSRUsed[PRED] |= prop;
+ addedUses = true;
+ blks.push_back(PRED);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "predecessor " << getBasicBlockName(PRED) << "\n");
+ }
+ }
+ return addedUses;
+}
+
+/// addUsesForTopLevelLoops - add uses for CSRs used inside top
+/// level loops to the exit blocks of those loops.
+///
+bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
+ bool addedUses = false;
+
+ // Place restores for top level loops where needed.
+ for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
+ I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
+ MachineBasicBlock* MBB = I->first;
+ MachineLoop* LP = I->second;
+ MachineBasicBlock* HDR = LP->getHeader();
+ SmallVector<MachineBasicBlock*, 4> exitBlocks;
+ CSRegSet loopSpills;
+
+ loopSpills = CSRSave[MBB];
+ if (CSRSave[MBB].empty()) {
+ loopSpills = CSRUsed[HDR];
+ assert(!loopSpills.empty() && "No CSRs used in loop?");
+ } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
+ continue;
+
+ LP->getExitBlocks(exitBlocks);
+ assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
+ for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* EXB = exitBlocks[i];
+ if (! CSRUsed[EXB].contains(loopSpills)) {
+ CSRUsed[EXB] |= loopSpills;
+ addedUses = true;
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << "LOOP " << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(loopSpills) << ")->"
+ << getBasicBlockName(EXB) << "\n");
+ if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
+ blks.push_back(EXB);
+ }
+ }
+ }
+ return addedUses;
+}
+
+/// calcSpillPlacements - determine which CSRs should be spilled
+/// in MBB using AnticIn sets of MBB's predecessors, keeping track
+/// of changes to spilled reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills) {
+ bool placedSpills = false;
+ // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
+ CSRegSet anticInPreds;
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ MachineBasicBlock* PRED = predecessors[i];
+ anticInPreds = UsedCSRegs - AnticIn[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+ }
+ } else {
+ // Handle uses in entry blocks (which have no predecessors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ anticInPreds = UsedCSRegs;
+ }
+ // Compute spills required at MBB:
+ CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+ if (! CSRSave[MBB].empty()) {
+ if (MBB == EntryBlock) {
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
+ } else {
+ // Reset all regs spilled in MBB that are also spilled in EntryBlock.
+ if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
+ CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+ }
+ }
+ }
+ placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
+ prevSpills[MBB] = CSRSave[MBB];
+ // Remember this block for adding restores to successor
+ // blocks for multi-entry region.
+ if (placedSpills)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]) << "\n");
+
+ return placedSpills;
+}
+
+/// calcRestorePlacements - determine which CSRs should be restored
+/// in MBB using AvailOut sets of MBB's succcessors, keeping track
+/// of changes to restored reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores) {
+ bool placedRestores = false;
+ // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
+ CSRegSet availOutSucc;
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ MachineBasicBlock* SUCC = successors[i];
+ availOutSucc = UsedCSRegs - AvailOut[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+ }
+ } else {
+ if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
+ // Handle uses in return blocks (which have no successors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ availOutSucc = UsedCSRegs;
+ }
+ }
+ // Compute restores required at MBB:
+ CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+ // Postprocess restore placements at MBB.
+ // Remove the CSRs that are restored in the return blocks.
+ // Lest this be confusing, note that:
+ // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+ if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
+ if (! CSRSave[EntryBlock].empty())
+ CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+ }
+ placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
+ prevRestores[MBB] = CSRRestore[MBB];
+ // Remember this block for adding saves to predecessor
+ // blocks for multi-entry region.
+ if (placedRestores)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ return placedRestores;
+}
+
+/// placeSpillsAndRestores - place spills and restores of CSRs
+/// used in MBBs in minimal regions that contain the uses.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+ CSRegBlockMap prevCSRSave;
+ CSRegBlockMap prevCSRRestore;
+ SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
+ bool changed = true;
+ unsigned iterations = 0;
+
+ // Iterate computation of spill and restore placements in the MCFG until:
+ // 1. CSR use info has been fully propagated around the MCFG, and
+ // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
+ while (changed) {
+ changed = false;
+ ++iterations;
+
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ dbgs() << "iter " << iterations
+ << " --------------------------------------------------\n");
+
+ // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
+ // which determines the placements of spills and restores.
+ // Keep track of changes to spills, restores in each iteration to
+ // minimize the total iterations.
+ bool SRChanged = false;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Place spills for CSRs in MBB.
+ SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
+
+ // Place restores for CSRs in MBB.
+ SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
+ }
+
+ // Add uses of CSRs used inside loops where needed.
+ changed |= addUsesForTopLevelLoops(cvBlocks);
+
+ // Add uses for CSRs spilled or restored at branch, join points.
+ if (changed || SRChanged) {
+ while (! cvBlocks.empty()) {
+ MachineBasicBlock* MBB = cvBlocks.pop_back_val();
+ changed |= addUsesForMEMERegion(MBB, ncvBlocks);
+ }
+ if (! ncvBlocks.empty()) {
+ cvBlocks = ncvBlocks;
+ ncvBlocks.clear();
+ }
+ }
+
+ if (changed) {
+ calculateAnticAvail(Fn);
+ CSRSave.clear();
+ CSRRestore.clear();
+ }
+ }
+
+ // Check for effectiveness:
+ // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
+ // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
+ // Gives a measure of how many CSR spills have been moved from EntryBlock
+ // to minimal regions enclosing their uses.
+ CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
+ unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
+ numSRReduced += numSRReducedThisFunc;
+ DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
+ dbgs() << "-----------------------------------------------------------\n";
+ dbgs() << "total iterations = " << iterations << " ( "
+ << Fn.getName()
+ << " " << numSRReducedThisFunc
+ << " " << Fn.size()
+ << " )\n";
+ dbgs() << "-----------------------------------------------------------\n";
+ dumpSRSets();
+ dbgs() << "-----------------------------------------------------------\n";
+ if (numSRReducedThisFunc)
+ verifySpillRestorePlacement();
+ });
+}
+
+// Debugging methods.
+#ifndef NDEBUG
+/// findFastExitPath - debugging method used to detect functions
+/// with at least one path from the entry block to a return block
+/// directly or which has a very small number of edges.
+///
+void PEI::findFastExitPath() {
+ if (! EntryBlock)
+ return;
+ // Fina a path from EntryBlock to any return block that does not branch:
+ // Entry
+ // | ...
+ // v |
+ // B1<-----+
+ // |
+ // v
+ // Return
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+
+ // Assume positive, disprove existence of fast path.
+ HasFastExitPath = true;
+
+ // Check the immediate successors.
+ if (isReturnBlock(SUCC)) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << getBasicBlockName(SUCC) << "\n";
+ break;
+ }
+ // Traverse df from SUCC, look for a branch block.
+ std::string exitPath = getBasicBlockName(SUCC);
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+ BE = df_end(SUCC); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ // Reject paths with branch nodes.
+ if (SBB->succ_size() > 1) {
+ HasFastExitPath = false;
+ break;
+ }
+ exitPath += "->" + getBasicBlockName(SBB);
+ }
+ if (HasFastExitPath) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << exitPath << "\n";
+ break;
+ }
+ }
+}
+
+/// verifySpillRestorePlacement - check the current spill/restore
+/// sets for safety. Attempt to find spills without restores or
+/// restores without spills.
+/// Spills: walk df from each MBB in spill set ensuring that
+/// all CSRs spilled at MMBB are restored on all paths
+/// from MBB to all exit blocks.
+/// Restores: walk idf from each MBB in restore set ensuring that
+/// all CSRs restored at MBB are spilled on all paths
+/// reaching MBB.
+///
+void PEI::verifySpillRestorePlacement() {
+ unsigned numReturnBlocks = 0;
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (isReturnBlock(MBB) || MBB->succ_size() == 0)
+ ++numReturnBlocks;
+ }
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet spilled = BI->second;
+ CSRegSet restored;
+
+ if (spilled.empty())
+ continue;
+
+ DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(spilled)
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ if (CSRRestore[MBB].intersects(spilled)) {
+ restored |= (CSRRestore[MBB] & spilled);
+ }
+
+ // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
+ // we must find restores for all spills w/no intervening spills on all
+ // paths from MBB to all return blocks.
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
+ BE = df_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ if (SBB == MBB)
+ continue;
+ // Stop when we encounter spills of any CSRs spilled at MBB that
+ // have not yet been seen to be restored.
+ if (CSRSave[SBB].intersects(spilled) &&
+ !restored.contains(CSRSave[SBB] & spilled))
+ break;
+ // Collect the CSRs spilled at MBB that are restored
+ // at this DF successor of MBB.
+ if (CSRRestore[SBB].intersects(spilled))
+ restored |= (CSRRestore[SBB] & spilled);
+ // If we are at a retun block, check that the restores
+ // we have seen so far exhaust the spills at MBB, then
+ // reset the restores.
+ if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
+ if (restored != spilled) {
+ CSRegSet notRestored = (spilled - restored);
+ DEBUG(dbgs() << MF->getName() << ": "
+ << stringifyCSRegSet(notRestored)
+ << " spilled at " << getBasicBlockName(MBB)
+ << " are never restored on path to return "
+ << getBasicBlockName(SBB) << "\n");
+ }
+ restored.clear();
+ }
+ }
+ }
+
+ // Check restore placements.
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restored = BI->second;
+ CSRegSet spilled;
+
+ if (restored.empty())
+ continue;
+
+ DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB])
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(restored) << "\n");
+
+ if (CSRSave[MBB].intersects(restored)) {
+ spilled |= (CSRSave[MBB] & restored);
+ }
+ // Walk inverse depth first from MBB to find spills of all
+ // CSRs restored at MBB:
+ for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
+ BE = idf_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* PBB = *BI;
+ if (PBB == MBB)
+ continue;
+ // Stop when we encounter restores of any CSRs restored at MBB that
+ // have not yet been seen to be spilled.
+ if (CSRRestore[PBB].intersects(restored) &&
+ !spilled.contains(CSRRestore[PBB] & restored))
+ break;
+ // Collect the CSRs restored at MBB that are spilled
+ // at this DF predecessor of MBB.
+ if (CSRSave[PBB].intersects(restored))
+ spilled |= (CSRSave[PBB] & restored);
+ }
+ if (spilled != restored) {
+ CSRegSet notSpilled = (restored - spilled);
+ DEBUG(dbgs() << MF->getName() << ": "
+ << stringifyCSRegSet(notSpilled)
+ << " restored at " << getBasicBlockName(MBB)
+ << " are never spilled\n");
+ }
+ }
+}
+
+// Debugging print methods.
+std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+ if (!MBB)
+ return "";
+
+ if (MBB->getBasicBlock())
+ return MBB->getBasicBlock()->getName().str();
+
+ std::ostringstream name;
+ name << "_MBB_" << MBB->getNumber();
+ return name.str();
+}
+
+std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+ const std::vector<CalleeSavedInfo> CSI =
+ MF->getFrameInfo()->getCalleeSavedInfo();
+
+ std::ostringstream srep;
+ if (CSI.size() == 0) {
+ srep << "[]";
+ return srep.str();
+ }
+ srep << "[";
+ CSRegSet::iterator I = s.begin(), E = s.end();
+ if (I != E) {
+ unsigned reg = CSI[*I].getReg();
+ srep << TRI->getName(reg);
+ for (++I; I != E; ++I) {
+ reg = CSI[*I].getReg();
+ srep << ",";
+ srep << TRI->getName(reg);
+ }
+ }
+ srep << "]";
+ return srep.str();
+}
+
+void PEI::dumpSet(const CSRegSet& s) {
+ DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
+}
+
+void PEI::dumpUsed(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRUsed[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpAllUsed() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpUsed(MBB);
+ }
+}
+
+void PEI::dumpSets(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpSets1(MachineBasicBlock* MBB) {
+ DEBUG({
+ if (MBB)
+ dbgs() << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << " | "
+ << stringifyCSRegSet(CSRSave[MBB]) << " | "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ });
+}
+
+void PEI::dumpAllSets() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets1(MBB);
+ }
+}
+
+void PEI::dumpSRSets() {
+ DEBUG({
+ for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+ MBB != E; ++MBB) {
+ if (!CSRSave[MBB].empty()) {
+ dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]);
+ if (CSRRestore[MBB].empty())
+ dbgs() << '\n';
+ }
+
+ if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
+ dbgs() << " "
+ << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ }
+ });
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 0000000..3903743
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,502 @@
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sjljehprepare"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+ class SjLjEHPrepare : public FunctionPass {
+ const TargetLoweringBase *TLI;
+ Type *FunctionContextTy;
+ Constant *RegisterFn;
+ Constant *UnregisterFn;
+ Constant *BuiltinSetjmpFn;
+ Constant *FrameAddrFn;
+ Constant *StackAddrFn;
+ Constant *StackRestoreFn;
+ Constant *LSDAAddrFn;
+ Value *PersonalityFn;
+ Constant *CallSiteFn;
+ Constant *FuncCtxFn;
+ AllocaInst *FuncCtx;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL)
+ : FunctionPass(ID), TLI(tli) { }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
+ const char *getPassName() const {
+ return "SJLJ Exception Handling preparation";
+ }
+
+ private:
+ bool setupEntryBlockAndCallSites(Function &F);
+ void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal);
+ Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
+ void lowerIncomingArguments(Function &F);
+ void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
+ void insertCallSiteStore(Instruction *I, int Number);
+ };
+} // end anonymous namespace
+
+char SjLjEHPrepare::ID = 0;
+
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) {
+ return new SjLjEHPrepare(TLI);
+}
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPrepare::doInitialization(Module &M) {
+ // Build the function context structure.
+ // builtin_setjmp uses a five word jbuf
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ FunctionContextTy =
+ StructType::get(VoidPtrTy, // __prev
+ Int32Ty, // call_site
+ ArrayType::get(Int32Ty, 4), // __data
+ VoidPtrTy, // __personality
+ VoidPtrTy, // __lsda
+ ArrayType::get(VoidPtrTy, 5), // __jbuf
+ NULL);
+ RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ UnregisterFn =
+ M.getOrInsertFunction("_Unwind_SjLj_Unregister",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+ LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+ CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
+ PersonalityFn = 0;
+
+ return true;
+}
+
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
+ IRBuilder<> Builder(I);
+
+ // Get a reference to the call_site field.
+ Type *Int32Ty = Type::getInt32Ty(I->getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ Value *One = ConstantInt::get(Int32Ty, 1);
+ Value *Idxs[2] = { Zero, One };
+ Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
+
+ // Insert a store of the call-site number
+ ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Number);
+ Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/);
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 64> &LiveBBs) {
+ if (!LiveBBs.insert(BB)) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal) {
+ SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+ while (!UseWorkList.empty()) {
+ Value *Val = UseWorkList.pop_back_val();
+ ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
+ if (!EVI) continue;
+ if (EVI->getNumIndices() != 1) continue;
+ if (*EVI->idx_begin() == 0)
+ EVI->replaceAllUsesWith(ExnVal);
+ else if (*EVI->idx_begin() == 1)
+ EVI->replaceAllUsesWith(SelVal);
+ if (EVI->getNumUses() == 0)
+ EVI->eraseFromParent();
+ }
+
+ if (LPI->getNumUses() == 0) return;
+
+ // There are still some uses of LPI. Construct an aggregate with the exception
+ // values and replace the LPI with that aggregate.
+ Type *LPadType = LPI->getType();
+ Value *LPadVal = UndefValue::get(LPadType);
+ IRBuilder<>
+ Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+ LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+
+ LPI->replaceAllUsesWith(LPadVal);
+}
+
+/// setupFunctionContext - Allocate the function context on the stack and fill
+/// it with all of the data that we know at this point.
+Value *SjLjEHPrepare::
+setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
+ BasicBlock *EntryBB = F.begin();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an alloca
+ // because the value needs to be added to the global context list.
+ unsigned Align =
+ TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
+ FuncCtx =
+ new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
+
+ // Fill in the function context structure.
+ for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
+ LandingPadInst *LPI = LPads[I];
+ IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
+
+ // Reference the __data field.
+ Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data");
+
+ // The exception values come back in context->__data[0].
+ Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0,
+ "exception_gep");
+ Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
+ ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
+
+ Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1,
+ "exn_selector_gep");
+ Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
+
+ substituteLPadValues(LPI, ExnVal, SelVal);
+ }
+
+ // Personality function
+ IRBuilder<> Builder(EntryBB->getTerminator());
+ if (!PersonalityFn)
+ PersonalityFn = LPads[0]->getPersonalityFn();
+ Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3,
+ "pers_fn_gep");
+ Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true);
+
+ // LSDA address
+ Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
+ Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep");
+ Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
+
+ return FuncCtx;
+}
+
+/// lowerIncomingArguments - To avoid having to handle incoming arguments
+/// specially, we lower each arg to a copy instruction in the entry block. This
+/// ensures that the argument value itself cannot be live out of the entry
+/// block.
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
+ BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
+ ++AfterAllocaInsPt;
+
+ for (Function::arg_iterator
+ AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) {
+ Type *Ty = AI->getType();
+
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC =
+ new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp",
+ AfterAllocaInsPt);
+ AI->replaceAllUsesWith(NC);
+
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However, we're
+ // replacing it here with the same value it was constructed with. We do
+ // this because the above replaceAllUsesWith() clobbered the operand, but
+ // we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+}
+
+/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
+/// edge and spill them.
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst*> Invokes) {
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator
+ BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+ for (BasicBlock::iterator
+ II = BB->begin(), IIE = BB->end(); II != IIE; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*, 16> Users;
+ for (Value::use_iterator
+ UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Find all of the blocks that this value is live in.
+ SmallPtrSet<BasicBlock*, 64> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around "
+ << UnwindBlock->getName() << "\n");
+ NeedsSpill = true;
+ break;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
+ if (NeedsSpill) {
+ DemoteRegToStack(*Inst, true);
+ ++NumSpilled;
+ }
+ }
+ }
+
+ // Go through the landing pads and remove any PHIs there.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
+
+ // Place PHIs into a set to avoid invalidating the iterator.
+ SmallPtrSet<PHINode*, 8> PHIsToDemote;
+ for (BasicBlock::iterator
+ PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
+ PHIsToDemote.insert(cast<PHINode>(PN));
+ if (PHIsToDemote.empty()) continue;
+
+ // Demote the PHIs to the stack.
+ for (SmallPtrSet<PHINode*, 8>::iterator
+ I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I)
+ DemotePHIToStack(*I);
+
+ // Move the landingpad instruction back to the top of the landing pad block.
+ LPI->moveBefore(UnwindBlock->begin());
+ }
+}
+
+/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
+/// the function context and marking the call sites with the appropriate
+/// values. These values are used by the DWARF EH emitter.
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
+ SmallVector<ReturnInst*, 16> Returns;
+ SmallVector<InvokeInst*, 16> Invokes;
+ SmallSetVector<LandingPadInst*, 16> LPads;
+
+ // Look through the terminators of the basic blocks to find invokes.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ if (Function *Callee = II->getCalledFunction())
+ if (Callee->isIntrinsic() &&
+ Callee->getIntrinsicID() == Intrinsic::donothing) {
+ // Remove the NOP invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->eraseFromParent();
+ continue;
+ }
+
+ Invokes.push_back(II);
+ LPads.insert(II->getUnwindDest()->getLandingPadInst());
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ Returns.push_back(RI);
+ }
+
+ if (Invokes.empty()) return false;
+
+ NumInvokes += Invokes.size();
+
+ lowerIncomingArguments(F);
+ lowerAcrossUnwindEdges(F, Invokes);
+
+ Value *FuncCtx =
+ setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
+ BasicBlock *EntryBB = F.begin();
+ IRBuilder<> Builder(EntryBB->getTerminator());
+
+ // Get a reference to the jump buffer.
+ Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep");
+
+ // Save the frame pointer.
+ Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep");
+
+ Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
+ Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
+
+ // Save the stack pointer.
+ Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep");
+
+ Val = Builder.CreateCall(StackAddrFn, "sp");
+ Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+ Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy());
+ Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg);
+
+ // Store a pointer to the function context so that the back-end will know
+ // where to look for it.
+ Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
+ Builder.CreateCall(FuncCtxFn, FuncCtxArg);
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values.
+ for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
+ insertCallSiteStore(Invokes[I], I + 1);
+
+ ConstantInt *CallSiteNum =
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
+
+ // Record the call site value for the back end so it stays associated with
+ // the invoke.
+ CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
+ }
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;)
+ for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!CI->doesNotThrow())
+ insertCallSiteStore(CI, -1);
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
+ insertCallSiteStore(RI, -1);
+ }
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "",
+ EntryBB->getTerminator());
+ Register->setDoesNotThrow();
+
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (BB == F.begin())
+ continue;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->getCalledFunction() != StackRestoreFn)
+ continue;
+ } else if (!isa<AllocaInst>(I)) {
+ continue;
+ }
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(I);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
+ }
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (unsigned I = 0, E = Returns.size(); I != E; ++I)
+ CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]);
+
+ return true;
+}
+
+bool SjLjEHPrepare::runOnFunction(Function &F) {
+ bool Res = setupEntryBlockAndCallSites(F);
+ return Res;
+}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
new file mode 100644
index 0000000..20049a8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -0,0 +1,250 @@
+//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "slotindexes"
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char SlotIndexes::ID = 0;
+INITIALIZE_PASS(SlotIndexes, "slotindexes",
+ "Slot index numbering", false, false)
+
+STATISTIC(NumLocalRenum, "Number of local renumberings");
+STATISTIC(NumGlobalRenum, "Number of global renumberings");
+
+void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void SlotIndexes::releaseMemory() {
+ mi2iMap.clear();
+ MBBRanges.clear();
+ idx2MBBMap.clear();
+ indexList.clear();
+ ileAllocator.Reset();
+}
+
+bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+
+ // Compute numbering as follows:
+ // Grab an iterator to the start of the index list.
+ // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+ // iterator in lock-step (though skipping it over indexes which have
+ // null pointers in the instruction field).
+ // At each iteration assert that the instruction pointed to in the index
+ // is the same one pointed to by the MI iterator. This
+
+ // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+ // only need to be set up once after the first numbering is computed.
+
+ mf = &fn;
+
+ // Check that the list contains only the sentinal.
+ assert(indexList.empty() && "Index list non-empty at initial numbering?");
+ assert(idx2MBBMap.empty() &&
+ "Index -> MBB mapping non-empty at initial numbering?");
+ assert(MBBRanges.empty() &&
+ "MBB -> Index mapping non-empty at initial numbering?");
+ assert(mi2iMap.empty() &&
+ "MachineInstr -> Index mapping non-empty at initial numbering?");
+
+ unsigned index = 0;
+ MBBRanges.resize(mf->getNumBlockIDs());
+ idx2MBBMap.reserve(mf->size());
+
+ indexList.push_back(createEntry(0, index));
+
+ // Iterate over the function.
+ for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ MachineBasicBlock *mbb = &*mbbItr;
+
+ // Insert an index for the MBB start.
+ SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block);
+
+ for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ MachineInstr *mi = miItr;
+ if (mi->isDebugValue())
+ continue;
+
+ // Insert a store index for the instr.
+ indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist));
+
+ // Save this base index in the maps.
+ mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(),
+ SlotIndex::Slot_Block)));
+ }
+
+ // We insert one blank instructions between basic blocks.
+ indexList.push_back(createEntry(0, index += SlotIndex::InstrDist));
+
+ MBBRanges[mbb->getNumber()].first = blockStartIndex;
+ MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(),
+ SlotIndex::Slot_Block);
+ idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
+ }
+
+ // Sort the Idx2MBBMap
+ std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+
+ DEBUG(mf->print(dbgs(), this));
+
+ // And we're done!
+ return false;
+}
+
+void SlotIndexes::renumberIndexes() {
+ // Renumber updates the index of every element of the index list.
+ DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
+ ++NumGlobalRenum;
+
+ unsigned index = 0;
+
+ for (IndexList::iterator I = indexList.begin(), E = indexList.end();
+ I != E; ++I) {
+ I->setIndex(index);
+ index += SlotIndex::InstrDist;
+ }
+}
+
+// Renumber indexes locally after curItr was inserted, but failed to get a new
+// index.
+void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
+ // Number indexes with half the default spacing so we can catch up quickly.
+ const unsigned Space = SlotIndex::InstrDist/2;
+ assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+
+ IndexList::iterator startItr = prior(curItr);
+ unsigned index = startItr->getIndex();
+ do {
+ curItr->setIndex(index += Space);
+ ++curItr;
+ // If the next index is bigger, we have caught up.
+ } while (curItr != indexList.end() && curItr->getIndex() <= index);
+
+ DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-'
+ << index << " ***\n");
+ ++NumLocalRenum;
+}
+
+// Repair indexes after adding and removing instructions.
+void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End) {
+ // FIXME: Is this really necessary? The only caller repairIntervalsForRange()
+ // does the same thing.
+ // Find anchor points, which are at the beginning/end of blocks or at
+ // instructions that already have indexes.
+ while (Begin != MBB->begin() && !hasIndex(Begin))
+ --Begin;
+ while (End != MBB->end() && !hasIndex(End))
+ ++End;
+
+ bool includeStart = (Begin == MBB->begin());
+ SlotIndex startIdx;
+ if (includeStart)
+ startIdx = getMBBStartIdx(MBB);
+ else
+ startIdx = getInstructionIndex(Begin);
+
+ SlotIndex endIdx;
+ if (End == MBB->end())
+ endIdx = getMBBEndIdx(MBB);
+ else
+ endIdx = getInstructionIndex(End);
+
+ // FIXME: Conceptually, this code is implementing an iterator on MBB that
+ // optionally includes an additional position prior to MBB->begin(), indicated
+ // by the includeStart flag. This is done so that we can iterate MIs in a MBB
+ // in parallel with SlotIndexes, but there should be a better way to do this.
+ IndexList::iterator ListB = startIdx.listEntry();
+ IndexList::iterator ListI = endIdx.listEntry();
+ MachineBasicBlock::iterator MBBI = End;
+ bool pastStart = false;
+ while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
+ assert(ListI->getIndex() >= startIdx.getIndex() &&
+ (includeStart || !pastStart) &&
+ "Decremented past the beginning of region to repair.");
+
+ MachineInstr *SlotMI = ListI->getInstr();
+ MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0;
+ bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
+
+ if (SlotMI == MI && !MBBIAtBegin) {
+ --ListI;
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) {
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else {
+ --ListI;
+ if (SlotMI)
+ removeMachineInstrFromMaps(SlotMI);
+ }
+ }
+
+ // In theory this could be combined with the previous loop, but it is tricky
+ // to update the IndexList while we are iterating it.
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr *MI = I;
+ if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end())
+ insertMachineInstrInMaps(MI);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void SlotIndexes::dump() const {
+ for (IndexList::const_iterator itr = indexList.begin();
+ itr != indexList.end(); ++itr) {
+ dbgs() << itr->getIndex() << " ";
+
+ if (itr->getInstr() != 0) {
+ dbgs() << *itr->getInstr();
+ } else {
+ dbgs() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
+ dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
+ << MBBRanges[i].second << ")\n";
+}
+#endif
+
+// Print a SlotIndex to a raw_ostream.
+void SlotIndex::print(raw_ostream &os) const {
+ if (isValid())
+ os << listEntry()->getIndex() << "Berd"[getSlot()];
+ else
+ os << "invalid";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+// Dump a SlotIndex to stderr.
+void SlotIndex::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 0000000..c5bbba3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,381 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<EdgeBundles>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+ /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle.
+ /// Ideally, these two numbers should be identical, but inaccuracies in the
+ /// block frequency estimates means that we need to normalize ingoing and
+ /// outgoing frequencies separately so they are commensurate.
+ float Scale[2];
+
+ /// Bias - Normalized contributions from non-transparent blocks.
+ /// A bundle connected to a MustSpill block has a huge negative bias,
+ /// otherwise it is a number in the range [-2;2].
+ float Bias;
+
+ /// Value - Output value of this node computed from the Bias and links.
+ /// This is always in the range [-1;1]. A positive number means the variable
+ /// should go in a register through this bundle.
+ float Value;
+
+ typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+ /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+ /// bundles. The weights are all positive and add up to at most 2, weights
+ /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+ /// sum can be less than 2 when the variable is not live into / out of some
+ /// connected basic blocks.
+ LinkVector Links;
+
+ /// preferReg - Return true when this node prefers to be in a register.
+ bool preferReg() const {
+ // Undecided nodes (Value==0) go on the stack.
+ return Value > 0;
+ }
+
+ /// mustSpill - Return True if this node is so biased that it must spill.
+ bool mustSpill() const {
+ // Actually, we must spill if Bias < sum(weights).
+ // It may be worth it to compute the weight sum here?
+ return Bias < -2.0f;
+ }
+
+ /// Node - Create a blank Node.
+ Node() {
+ Scale[0] = Scale[1] = 0;
+ }
+
+ /// clear - Reset per-query data, but preserve frequencies that only depend on
+ // the CFG.
+ void clear() {
+ Bias = Value = 0;
+ Links.clear();
+ }
+
+ /// addLink - Add a link to bundle b with weight w.
+ /// out=0 for an ingoing link, and 1 for an outgoing link.
+ void addLink(unsigned b, float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w *= Scale[out];
+
+ // There can be multiple links to the same bundle, add them up.
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ if (I->second == b) {
+ I->first += w;
+ return;
+ }
+ // This must be the first link to b.
+ Links.push_back(std::make_pair(w, b));
+ }
+
+ /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+ /// Return the change to the total number of positive biases.
+ void addBias(float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w *= Scale[out];
+ Bias += w;
+ }
+
+ /// update - Recompute Value from Bias and Links. Return true when node
+ /// preference changes.
+ bool update(const Node nodes[]) {
+ // Compute the weighted sum of inputs.
+ float Sum = Bias;
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ Sum += I->first * nodes[I->second].Value;
+
+ // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+ // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+ // two reasons:
+ // 1. It avoids arbitrary bias when all links are 0 as is possible during
+ // initial iterations.
+ // 2. It helps tame rounding errors when the links nominally sum to 0.
+ const float Thres = 1e-4f;
+ bool Before = preferReg();
+ if (Sum < -Thres)
+ Value = -1;
+ else if (Sum > Thres)
+ Value = 1;
+ else
+ Value = 0;
+ return Before != preferReg();
+ }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ bundles = &getAnalysis<EdgeBundles>();
+ loops = &getAnalysis<MachineLoopInfo>();
+
+ assert(!nodes && "Leaking node array");
+ nodes = new Node[bundles->getNumBundles()];
+
+ // Compute total ingoing and outgoing block frequencies for all bundles.
+ BlockFrequency.resize(mf.getNumBlockIDs());
+ for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+ float Freq = LiveIntervals::getSpillWeight(true, false,
+ loops->getLoopDepth(I));
+ unsigned Num = I->getNumber();
+ BlockFrequency[Num] = Freq;
+ nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq;
+ nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq;
+ }
+
+ // Scales are reciprocal frequencies.
+ for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i)
+ for (unsigned d = 0; d != 2; ++d)
+ if (nodes[i].Scale[d] > 0)
+ nodes[i].Scale[d] = 1 / nodes[i].Scale[d];
+
+ // We never change the function.
+ return false;
+}
+
+void SpillPlacement::releaseMemory() {
+ delete[] nodes;
+ nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+ if (ActiveNodes->test(n))
+ return;
+ ActiveNodes->set(n);
+ nodes[n].clear();
+
+ // Very large bundles usually come from big switches, indirect branches,
+ // landing pads, or loops with many 'continue' statements. It is difficult to
+ // allocate registers when so many different blocks are involved.
+ //
+ // Give a small negative bias to large bundles such that 1/32 of the
+ // connected blocks need to be interested before we consider expanding the
+ // region through the bundle. This helps compile time by limiting the number
+ // of blocks visited and the number of links in the Hopfield network.
+ if (bundles->getBlocks(n).size() > 100)
+ nodes[n].Bias = -0.0625f;
+}
+
+
+/// addConstraints - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
+ for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(),
+ E = LiveBlocks.end(); I != E; ++I) {
+ float Freq = getBlockFrequency(I->Number);
+ const float Bias[] = {
+ 0, // DontCare,
+ 1, // PrefReg,
+ -1, // PrefSpill
+ 0, // PrefBoth
+ -HUGE_VALF // MustSpill
+ };
+
+ // Live-in to block?
+ if (I->Entry != DontCare) {
+ unsigned ib = bundles->getBundle(I->Number, 0);
+ activate(ib);
+ nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+ }
+
+ // Live-out from block?
+ if (I->Exit != DontCare) {
+ unsigned ob = bundles->getBundle(I->Number, 1);
+ activate(ob);
+ nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+ }
+ }
+}
+
+/// addPrefSpill - Same as addConstraints(PrefSpill)
+void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ float Freq = getBlockFrequency(*I);
+ if (Strong)
+ Freq += Freq;
+ unsigned ib = bundles->getBundle(*I, 0);
+ unsigned ob = bundles->getBundle(*I, 1);
+ activate(ib);
+ activate(ob);
+ nodes[ib].addBias(-Freq, 1);
+ nodes[ob].addBias(-Freq, 0);
+ }
+}
+
+void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
+ for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
+ ++I) {
+ unsigned Number = *I;
+ unsigned ib = bundles->getBundle(Number, 0);
+ unsigned ob = bundles->getBundle(Number, 1);
+
+ // Ignore self-loops.
+ if (ib == ob)
+ continue;
+ activate(ib);
+ activate(ob);
+ if (nodes[ib].Links.empty() && !nodes[ib].mustSpill())
+ Linked.push_back(ib);
+ if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
+ Linked.push_back(ob);
+ float Freq = getBlockFrequency(Number);
+ nodes[ib].addLink(ob, Freq, 1);
+ nodes[ob].addLink(ib, Freq, 0);
+ }
+}
+
+bool SpillPlacement::scanActiveBundles() {
+ Linked.clear();
+ RecentPositive.clear();
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+ nodes[n].update(nodes);
+ // A node that must spill, or a node without any links is not going to
+ // change its value ever again, so exclude it from iterations.
+ if (nodes[n].mustSpill())
+ continue;
+ if (!nodes[n].Links.empty())
+ Linked.push_back(n);
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ return !RecentPositive.empty();
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate() {
+ // First update the recently positive nodes. They have likely received new
+ // negative bias that will turn them off.
+ while (!RecentPositive.empty())
+ nodes[RecentPositive.pop_back_val()].update(nodes);
+
+ if (Linked.empty())
+ return;
+
+ // Run up to 10 iterations. The edge bundle numbering is closely related to
+ // basic block numbering, so there is a strong tendency towards chains of
+ // linked nodes with sequential numbers. By scanning the linked nodes
+ // backwards and forwards, we make it very likely that a single node can
+ // affect the entire network in a single iteration. That means very fast
+ // convergence, usually in a single iteration.
+ for (unsigned iteration = 0; iteration != 10; ++iteration) {
+ // Scan backwards, skipping the last node which was just updated.
+ bool Changed = false;
+ for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+ llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+ unsigned n = *I;
+ if (nodes[n].update(nodes)) {
+ Changed = true;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ }
+ if (!Changed || !RecentPositive.empty())
+ return;
+
+ // Scan forwards, skipping the first node which was just updated.
+ Changed = false;
+ for (SmallVectorImpl<unsigned>::const_iterator I =
+ llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+ unsigned n = *I;
+ if (nodes[n].update(nodes)) {
+ Changed = true;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ }
+ if (!Changed || !RecentPositive.empty())
+ return;
+ }
+}
+
+void SpillPlacement::prepare(BitVector &RegBundles) {
+ Linked.clear();
+ RecentPositive.clear();
+ // Reuse RegBundles as our ActiveNodes vector.
+ ActiveNodes = &RegBundles;
+ ActiveNodes->clear();
+ ActiveNodes->resize(bundles->getNumBundles());
+}
+
+bool
+SpillPlacement::finish() {
+ assert(ActiveNodes && "Call prepare() first");
+
+ // Write preferences back to ActiveNodes.
+ bool Perfect = true;
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
+ if (!nodes[n].preferReg()) {
+ ActiveNodes->reset(n);
+ Perfect = false;
+ }
+ ActiveNodes = 0;
+ return Perfect;
+}
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 0000000..fc412f8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,156 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by prepare(), addConstraints(), and
+// finish() which are called by the register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+
+class SpillPlacement : public MachineFunctionPass {
+ struct Node;
+ const MachineFunction *MF;
+ const EdgeBundles *bundles;
+ const MachineLoopInfo *loops;
+ Node *nodes;
+
+ // Nodes that are active in the current computation. Owned by the prepare()
+ // caller.
+ BitVector *ActiveNodes;
+
+ // Nodes with active links. Populated by scanActiveBundles.
+ SmallVector<unsigned, 8> Linked;
+
+ // Nodes that went positive during the last call to scanActiveBundles or
+ // iterate.
+ SmallVector<unsigned, 8> RecentPositive;
+
+ // Block frequencies are computed once. Indexed by block number.
+ SmallVector<float, 4> BlockFrequency;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+ ~SpillPlacement() { releaseMemory(); }
+
+ /// BorderConstraint - A basic block has separate constraints for entry and
+ /// exit.
+ enum BorderConstraint {
+ DontCare, ///< Block doesn't care / variable not live.
+ PrefReg, ///< Block entry/exit prefers a register.
+ PrefSpill, ///< Block entry/exit prefers a stack slot.
+ PrefBoth, ///< Block entry prefers both register and stack.
+ MustSpill ///< A register is impossible, variable must be spilled.
+ };
+
+ /// BlockConstraint - Entry and exit constraints for a basic block.
+ struct BlockConstraint {
+ unsigned Number; ///< Basic block number (from MBB::getNumber()).
+ BorderConstraint Entry : 8; ///< Constraint on block entry.
+ BorderConstraint Exit : 8; ///< Constraint on block exit.
+
+ /// True when this block changes the value of the live range. This means
+ /// the block has a non-PHI def. When this is false, a live-in value on
+ /// the stack can be live-out on the stack without inserting a spill.
+ bool ChangesValue;
+ };
+
+ /// prepare - Reset state and prepare for a new spill placement computation.
+ /// @param RegBundles Bit vector to receive the edge bundles where the
+ /// variable should be kept in a register. Each bit
+ /// corresponds to an edge bundle, a set bit means the
+ /// variable should be kept in a register through the
+ /// bundle. A clear bit means the variable should be
+ /// spilled. This vector is retained.
+ void prepare(BitVector &RegBundles);
+
+ /// addConstraints - Add constraints and biases. This method may be called
+ /// more than once to accumulate constraints.
+ /// @param LiveBlocks Constraints for blocks that have the variable live in or
+ /// live out.
+ void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
+
+ /// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is
+ /// equivalent to calling addConstraint with identical BlockConstraints with
+ /// Entry = Exit = PrefSpill, and ChangesValue = false.
+ ///
+ /// @param Blocks Array of block numbers that prefer to spill in and out.
+ /// @param Strong When true, double the negative bias for these blocks.
+ void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong);
+
+ /// addLinks - Add transparent blocks with the given numbers.
+ void addLinks(ArrayRef<unsigned> Links);
+
+ /// scanActiveBundles - Perform an initial scan of all bundles activated by
+ /// addConstraints and addLinks, updating their state. Add all the bundles
+ /// that now prefer a register to RecentPositive.
+ /// Prepare internal data structures for iterate.
+ /// Return true is there are any positive nodes.
+ bool scanActiveBundles();
+
+ /// iterate - Update the network iteratively until convergence, or new bundles
+ /// are found.
+ void iterate();
+
+ /// getRecentPositive - Return an array of bundles that became positive during
+ /// the previous call to scanActiveBundles or iterate.
+ ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
+
+ /// finish - Compute the optimal spill code placement given the
+ /// constraints. No MustSpill constraints will be violated, and the smallest
+ /// possible number of PrefX constraints will be violated, weighted by
+ /// expected execution frequencies.
+ /// The selected bundles are returned in the bitvector passed to prepare().
+ /// @return True if a perfect solution was found, allowing the variable to be
+ /// in a register through all relevant bundles.
+ bool finish();
+
+ /// getBlockFrequency - Return the estimated block execution frequency per
+ /// function invocation.
+ float getBlockFrequency(unsigned Number) const {
+ return BlockFrequency[Number];
+ }
+
+private:
+ virtual bool runOnMachineFunction(MachineFunction&);
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ virtual void releaseMemory();
+
+ void activate(unsigned);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp
new file mode 100644
index 0000000..209792f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Spiller.cpp
@@ -0,0 +1,194 @@
+//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+
+#include "Spiller.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+ enum SpillerName { trivial, inline_ };
+}
+
+static cl::opt<SpillerName>
+spillerOpt("spiller",
+ cl::desc("Spiller to use: (default: standard)"),
+ cl::Prefix,
+ cl::values(clEnumVal(trivial, "trivial spiller"),
+ clEnumValN(inline_, "inline", "inline spiller"),
+ clEnumValEnd),
+ cl::init(trivial));
+
+// Spiller virtual destructor implementation.
+Spiller::~Spiller() {}
+
+namespace {
+
+/// Utility class for spillers.
+class SpillerBase : public Spiller {
+protected:
+ MachineFunctionPass *pass;
+ MachineFunction *mf;
+ VirtRegMap *vrm;
+ LiveIntervals *lis;
+ MachineFrameInfo *mfi;
+ MachineRegisterInfo *mri;
+ const TargetInstrInfo *tii;
+ const TargetRegisterInfo *tri;
+
+ /// Construct a spiller base.
+ SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+ : pass(&pass), mf(&mf), vrm(&vrm)
+ {
+ lis = &pass.getAnalysis<LiveIntervals>();
+ mfi = mf.getFrameInfo();
+ mri = &mf.getRegInfo();
+ tii = mf.getTarget().getInstrInfo();
+ tri = mf.getTarget().getRegisterInfo();
+ }
+
+ /// Add spill ranges for every use/def of the live interval, inserting loads
+ /// immediately before each use, and stores after each def. No folding or
+ /// remat is attempted.
+ void trivialSpillEverywhere(LiveRangeEdit& LRE) {
+ LiveInterval* li = &LRE.getParent();
+
+ DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
+
+ assert(li->weight != HUGE_VALF &&
+ "Attempting to spill already spilled value.");
+
+ assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
+ "Trying to spill a stack slot.");
+
+ DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
+
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+ unsigned ss = vrm->assignVirt2StackSlot(li->reg);
+
+ // Iterate over reg uses/defs.
+ for (MachineRegisterInfo::reg_iterator
+ regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
+
+ // Grab the use/def instr.
+ MachineInstr *mi = &*regItr;
+
+ DEBUG(dbgs() << " Processing " << *mi);
+
+ // Step regItr to the next use/def instr.
+ do {
+ ++regItr;
+ } while (regItr != mri->reg_end() && (&*regItr == mi));
+
+ // Collect uses & defs for this instr.
+ SmallVector<unsigned, 2> indices;
+ bool hasUse = false;
+ bool hasDef = false;
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
+ MachineOperand &op = mi->getOperand(i);
+ if (!op.isReg() || op.getReg() != li->reg)
+ continue;
+ hasUse |= mi->getOperand(i).isUse();
+ hasDef |= mi->getOperand(i).isDef();
+ indices.push_back(i);
+ }
+
+ // Create a new vreg & interval for this instr.
+ LiveInterval *newLI = &LRE.create();
+ newLI->weight = HUGE_VALF;
+
+ // Update the reg operands & kill flags.
+ for (unsigned i = 0; i < indices.size(); ++i) {
+ unsigned mopIdx = indices[i];
+ MachineOperand &mop = mi->getOperand(mopIdx);
+ mop.setReg(newLI->reg);
+ if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
+ mop.setIsKill(true);
+ }
+ }
+ assert(hasUse || hasDef);
+
+ // Insert reload if necessary.
+ MachineBasicBlock::iterator miItr(mi);
+ if (hasUse) {
+ tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc,
+ tri);
+ MachineInstr *loadInstr(prior(miItr));
+ SlotIndex loadIndex =
+ lis->InsertMachineInstrInMaps(loadInstr).getRegSlot();
+ SlotIndex endIndex = loadIndex.getNextIndex();
+ VNInfo *loadVNI =
+ newLI->getNextValue(loadIndex, lis->getVNInfoAllocator());
+ newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
+ }
+
+ // Insert store if necessary.
+ if (hasDef) {
+ tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg,
+ true, ss, trc, tri);
+ MachineInstr *storeInstr(llvm::next(miItr));
+ SlotIndex storeIndex =
+ lis->InsertMachineInstrInMaps(storeInstr).getRegSlot();
+ SlotIndex beginIndex = storeIndex.getPrevIndex();
+ VNInfo *storeVNI =
+ newLI->getNextValue(beginIndex, lis->getVNInfoAllocator());
+ newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
+ }
+ }
+ }
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Spills any live range using the spill-everywhere method with no attempt at
+/// folding.
+class TrivialSpiller : public SpillerBase {
+public:
+
+ TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : SpillerBase(pass, mf, vrm) {}
+
+ void spill(LiveRangeEdit &LRE) {
+ // Ignore spillIs - we don't use it.
+ trivialSpillEverywhere(LRE);
+ }
+};
+
+} // end anonymous namespace
+
+void Spiller::anchor() { }
+
+llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ switch (spillerOpt) {
+ case trivial: return new TrivialSpiller(pass, mf, vrm);
+ case inline_: return createInlineSpiller(pass, mf, vrm);
+ }
+ llvm_unreachable("Invalid spiller optimization");
+}
diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h
new file mode 100644
index 0000000..b7d5bea
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Spiller.h
@@ -0,0 +1,47 @@
+//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLER_H
+#define LLVM_CODEGEN_SPILLER_H
+
+namespace llvm {
+
+ class LiveRangeEdit;
+ class MachineFunction;
+ class MachineFunctionPass;
+ class VirtRegMap;
+
+ /// Spiller interface.
+ ///
+ /// Implementations are utility classes which insert spill or remat code on
+ /// demand.
+ class Spiller {
+ virtual void anchor();
+ public:
+ virtual ~Spiller() = 0;
+
+ /// spill - Spill the LRE.getParent() live interval.
+ virtual void spill(LiveRangeEdit &LRE) = 0;
+
+ };
+
+ /// Create and return a spiller object, as specified on the command line.
+ Spiller* createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+
+ /// Create and return a spiller that will insert spill code directly instead
+ /// of deferring though VirtRegMap.
+ Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
new file mode 100644
index 0000000..0a3818e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -0,0 +1,1432 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "SplitKit.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+STATISTIC(NumFinished, "Number of splits finished");
+STATISTIC(NumSimple, "Number of splits that were simple");
+STATISTIC(NumCopies, "Number of copies inserted for splitting");
+STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
+STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
+
+//===----------------------------------------------------------------------===//
+// Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
+ const LiveIntervals &lis,
+ const MachineLoopInfo &mli)
+ : MF(vrm.getMachineFunction()),
+ VRM(vrm),
+ LIS(lis),
+ Loops(mli),
+ TII(*MF.getTarget().getInstrInfo()),
+ CurLI(0),
+ LastSplitPoint(MF.getNumBlockIDs()) {}
+
+void SplitAnalysis::clear() {
+ UseSlots.clear();
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ CurLI = 0;
+ DidRepairRange = false;
+}
+
+SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
+ const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
+ const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
+ std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+ // Compute split points on the first call. The pair is independent of the
+ // current live interval.
+ if (!LSP.first.isValid()) {
+ MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
+ if (FirstTerm == MBB->end())
+ LSP.first = MBBEnd;
+ else
+ LSP.first = LIS.getInstructionIndex(FirstTerm);
+
+ // If there is a landing pad successor, also find the call instruction.
+ if (!LPad)
+ return LSP.first;
+ // There may not be a call instruction (?) in which case we ignore LPad.
+ LSP.second = LSP.first;
+ for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
+ I != E;) {
+ --I;
+ if (I->isCall()) {
+ LSP.second = LIS.getInstructionIndex(I);
+ break;
+ }
+ }
+ }
+
+ // If CurLI is live into a landing pad successor, move the last split point
+ // back to the call that may throw.
+ if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad))
+ return LSP.first;
+
+ // Find the value leaving MBB.
+ const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd);
+ if (!VNI)
+ return LSP.first;
+
+ // If the value leaving MBB was defined after the call in MBB, it can't
+ // really be live-in to the landing pad. This can happen if the landing pad
+ // has a PHI, and this register is undef on the exceptional edge.
+ // <rdar://problem/10664933>
+ if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd)
+ return LSP.first;
+
+ // Value is properly live-in to the landing pad.
+ // Only allow splits before the call.
+ return LSP.second;
+}
+
+MachineBasicBlock::iterator
+SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) {
+ SlotIndex LSP = getLastSplitPoint(MBB->getNumber());
+ if (LSP == LIS.getMBBEndIdx(MBB))
+ return MBB->end();
+ return LIS.getInstructionFromIndex(LSP);
+}
+
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
+void SplitAnalysis::analyzeUses() {
+ assert(UseSlots.empty() && "Call clear first");
+
+ // First get all the defs from the interval values. This provides the correct
+ // slots for early clobbers.
+ for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(),
+ E = CurLI->vni_end(); I != E; ++I)
+ if (!(*I)->isPHIDef() && !(*I)->isUnused())
+ UseSlots.push_back((*I)->def);
+
+ // Get use slots form the use-def chain.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
+ ++I)
+ if (!I.getOperand().isUndef())
+ UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot());
+
+ array_pod_sort(UseSlots.begin(), UseSlots.end());
+
+ // Remove duplicates, keeping the smaller slot for each instruction.
+ // That is what we want for early clobbers.
+ UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(),
+ SlotIndex::isSameInstr),
+ UseSlots.end());
+
+ // Compute per-live block info.
+ if (!calcLiveBlockInfo()) {
+ // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
+ // I am looking at you, RegisterCoalescer!
+ DidRepairRange = true;
+ ++NumRepairs;
+ DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
+ const_cast<LiveIntervals&>(LIS)
+ .shrinkToUses(const_cast<LiveInterval*>(CurLI));
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ bool fixed = calcLiveBlockInfo();
+ (void)fixed;
+ assert(fixed && "Couldn't fix broken live interval");
+ }
+
+ DEBUG(dbgs() << "Analyze counted "
+ << UseSlots.size() << " instrs in "
+ << UseBlocks.size() << " blocks, through "
+ << NumThroughBlocks << " blocks.\n");
+}
+
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+bool SplitAnalysis::calcLiveBlockInfo() {
+ ThroughBlocks.resize(MF.getNumBlockIDs());
+ NumThroughBlocks = NumGapBlocks = 0;
+ if (CurLI->empty())
+ return true;
+
+ LiveInterval::const_iterator LVI = CurLI->begin();
+ LiveInterval::const_iterator LVE = CurLI->end();
+
+ SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+ UseI = UseSlots.begin();
+ UseE = UseSlots.end();
+
+ // Loop over basic blocks where CurLI is live.
+ MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ for (;;) {
+ BlockInfo BI;
+ BI.MBB = MFI;
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ // If the block contains no uses, the range must be live through. At one
+ // point, RegisterCoalescer could create dangling ranges that ended
+ // mid-block.
+ if (UseI == UseE || *UseI >= Stop) {
+ ++NumThroughBlocks;
+ ThroughBlocks.set(BI.MBB->getNumber());
+ // The range shouldn't end mid-block if there are no uses. This shouldn't
+ // happen.
+ if (LVI->end < Stop)
+ return false;
+ } else {
+ // This block has uses. Find the first and last uses in the block.
+ BI.FirstInstr = *UseI;
+ assert(BI.FirstInstr >= Start);
+ do ++UseI;
+ while (UseI != UseE && *UseI < Stop);
+ BI.LastInstr = UseI[-1];
+ assert(BI.LastInstr < Stop);
+
+ // LVI is the first live segment overlapping MBB.
+ BI.LiveIn = LVI->start <= Start;
+
+ // When not live in, the first use should be a def.
+ if (!BI.LiveIn) {
+ assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ assert(LVI->start == BI.FirstInstr && "First instr should be a def");
+ BI.FirstDef = BI.FirstInstr;
+ }
+
+ // Look for gaps in the live range.
+ BI.LiveOut = true;
+ while (LVI->end < Stop) {
+ SlotIndex LastStop = LVI->end;
+ if (++LVI == LVE || LVI->start >= Stop) {
+ BI.LiveOut = false;
+ BI.LastInstr = LastStop;
+ break;
+ }
+
+ if (LastStop < LVI->start) {
+ // There is a gap in the live range. Create duplicate entries for the
+ // live-in snippet and the live-out snippet.
+ ++NumGapBlocks;
+
+ // Push the Live-in part.
+ BI.LiveOut = false;
+ UseBlocks.push_back(BI);
+ UseBlocks.back().LastInstr = LastStop;
+
+ // Set up BI for the live-out part.
+ BI.LiveIn = false;
+ BI.LiveOut = true;
+ BI.FirstInstr = BI.FirstDef = LVI->start;
+ }
+
+ // A LiveRange that starts in the middle of the block must be a def.
+ assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ if (!BI.FirstDef)
+ BI.FirstDef = LVI->start;
+ }
+
+ UseBlocks.push_back(BI);
+
+ // LVI is now at LVE or LVI->end >= Stop.
+ if (LVI == LVE)
+ break;
+ }
+
+ // Live segment ends exactly at Stop. Move to the next segment.
+ if (LVI->end == Stop && ++LVI == LVE)
+ break;
+
+ // Pick the next basic block.
+ if (LVI->start < Stop)
+ ++MFI;
+ else
+ MFI = LIS.getMBBFromIndex(LVI->start);
+ }
+
+ assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
+ return true;
+}
+
+unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
+ if (cli->empty())
+ return 0;
+ LiveInterval *li = const_cast<LiveInterval*>(cli);
+ LiveInterval::iterator LVI = li->begin();
+ LiveInterval::iterator LVE = li->end();
+ unsigned Count = 0;
+
+ // Loop over basic blocks where li is live.
+ MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+ for (;;) {
+ ++Count;
+ LVI = li->advanceTo(LVI, Stop);
+ if (LVI == LVE)
+ return Count;
+ do {
+ ++MFI;
+ Stop = LIS.getMBBEndIdx(MFI);
+ } while (Stop <= LVI->start);
+ }
+}
+
+bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
+ unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+ const LiveInterval &Orig = LIS.getInterval(OrigReg);
+ assert(!Orig.empty() && "Splitting empty interval?");
+ LiveInterval::const_iterator I = Orig.find(Idx);
+
+ // Range containing Idx should begin at Idx.
+ if (I != Orig.end() && I->start <= Idx)
+ return I->start == Idx;
+
+ // Range does not contain Idx, previous must end at Idx.
+ return I != Orig.begin() && (--I)->end == Idx;
+}
+
+void SplitAnalysis::analyze(const LiveInterval *li) {
+ clear();
+ CurLI = li;
+ analyzeUses();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Split Editor
+//===----------------------------------------------------------------------===//
+
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+ LiveIntervals &lis,
+ VirtRegMap &vrm,
+ MachineDominatorTree &mdt)
+ : SA(sa), LIS(lis), VRM(vrm),
+ MRI(vrm.getMachineFunction().getRegInfo()),
+ MDT(mdt),
+ TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+ TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+ Edit(0),
+ OpenIdx(0),
+ SpillMode(SM_Partition),
+ RegAssign(Allocator)
+{}
+
+void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
+ Edit = &LRE;
+ SpillMode = SM;
+ OpenIdx = 0;
+ RegAssign.clear();
+ Values.clear();
+
+ // Reset the LiveRangeCalc instances needed for this spill mode.
+ LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ if (SpillMode)
+ LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+
+ // We don't need an AliasAnalysis since we will only be performing
+ // cheap-as-a-copy remats anyway.
+ Edit->anyRematerializable(0);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void SplitEditor::dump() const {
+ if (RegAssign.empty()) {
+ dbgs() << " empty\n";
+ return;
+ }
+
+ for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+ dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+ dbgs() << '\n';
+}
+#endif
+
+VNInfo *SplitEditor::defValue(unsigned RegIdx,
+ const VNInfo *ParentVNI,
+ SlotIndex Idx) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // Create a new value.
+ VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
+
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator, bool> InsP =
+ Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id),
+ ValueForcePair(VNI, false)));
+
+ // This was the first time (RegIdx, ParentVNI) was mapped.
+ // Keep it as a simple def without any liveness.
+ if (InsP.second)
+ return VNI;
+
+ // If the previous value was a simple mapping, add liveness for it now.
+ if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
+ SlotIndex Def = OldVNI->def;
+ LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI));
+ // No longer a simple mapping. Switch to a complex, non-forced mapping.
+ InsP.first->second = ValueForcePair();
+ }
+
+ // This is a complex mapping, add liveness for VNI
+ SlotIndex Def = VNI->def;
+ LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
+
+ return VNI;
+}
+
+void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
+ assert(ParentVNI && "Mapping NULL value");
+ ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)];
+ VNInfo *VNI = VFP.getPointer();
+
+ // ParentVNI was either unmapped or already complex mapped. Either way, just
+ // set the force bit.
+ if (!VNI) {
+ VFP.setInt(true);
+ return;
+ }
+
+ // This was previously a single mapping. Make sure the old def is represented
+ // by a trivial live range.
+ SlotIndex Def = VNI->def;
+ Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
+ // Mark as complex mapped, forced.
+ VFP = ValueForcePair(0, true);
+}
+
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineInstr *CopyMI = 0;
+ SlotIndex Def;
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // We may be trying to avoid interference that ends at a deleted instruction,
+ // so always begin RegIdx 0 early and all others late.
+ bool Late = RegIdx != 0;
+
+ // Attempt cheap-as-a-copy rematerialization.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ if (Edit->canRematerializeAt(RM, UseIdx, true)) {
+ Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+ ++NumRemats;
+ } else {
+ // Can't remat, just insert a copy from parent.
+ CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+ .addReg(Edit->getReg());
+ Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
+ .getRegSlot();
+ ++NumCopies;
+ }
+
+ // Define the value in Reg.
+ return defValue(RegIdx, ParentVNI, Def);
+}
+
+/// Create a new virtual register and live interval.
+unsigned SplitEditor::openIntv() {
+ // Create the complement as index 0.
+ if (Edit->empty())
+ Edit->create();
+
+ // Create the open interval.
+ OpenIdx = Edit->size();
+ Edit->create();
+ return OpenIdx;
+}
+
+void SplitEditor::selectIntv(unsigned Idx) {
+ assert(Idx != 0 && "Cannot select the complement interval");
+ assert(Idx < Edit->size() && "Can only select previously opened interval");
+ DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n');
+ OpenIdx = Idx;
+}
+
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvBefore");
+ DEBUG(dbgs() << " enterIntvBefore " << Idx);
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvAfter");
+ DEBUG(dbgs() << " enterIntvAfter " << Idx);
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvAfter called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+ SlotIndex End = LIS.getMBBEndIdx(&MBB);
+ SlotIndex Last = End.getPrevSlot();
+ DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return End;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id);
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+ SA.getLastSplitPointIter(&MBB));
+ RegAssign.insert(VNI->def, End, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
+void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
+ useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
+}
+
+void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before useIntv");
+ DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
+
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx);
+
+ // The interval must be live beyond the instruction at Idx.
+ SlotIndex Boundary = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Boundary.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Boundary);
+ assert(MI && "No instruction at index");
+
+ // In spill mode, make live ranges as short as possible by inserting the copy
+ // before MI. This is only possible if that instruction doesn't redefine the
+ // value. The inserted COPY is not a kill, and we don't need to recompute
+ // the source live range. The spiller also won't try to hoist this copy.
+ if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) &&
+ MI->readsVirtualRegister(Edit->getReg())) {
+ forceRecompute(0, ParentVNI);
+ defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return Idx;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+ DEBUG(dbgs() << " leaveIntvBefore " << Idx);
+
+ // The interval must be live into the instruction at Idx.
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+ SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
+
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Start;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+ MBB.SkipPHIsAndLabels(MBB.begin()));
+ RegAssign.insert(Start, VNI->def, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before overlapIntv");
+ const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) &&
+ "Parent changes value in extended range");
+ assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+ "Range cannot span basic blocks");
+
+ // The complement interval will be extended as needed by LRCalc.extend().
+ if (ParentVNI)
+ forceRecompute(0, ParentVNI);
+ DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
+
+//===----------------------------------------------------------------------===//
+// Spill modes
+//===----------------------------------------------------------------------===//
+
+void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
+ LiveInterval *LI = Edit->get(0);
+ DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");
+ RegAssignMap::iterator AssignI;
+ AssignI.setMap(RegAssign);
+
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ VNInfo *VNI = Copies[i];
+ SlotIndex Def = VNI->def;
+ MachineInstr *MI = LIS.getInstructionFromIndex(Def);
+ assert(MI && "No instruction for back-copy");
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ bool AtBegin;
+ do AtBegin = MBBI == MBB->begin();
+ while (!AtBegin && (--MBBI)->isDebugValue());
+
+ DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
+ LI->removeValNo(VNI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+
+ // Adjust RegAssign if a register assignment is killed at VNI->def. We
+ // want to avoid calculating the live range of the source register if
+ // possible.
+ AssignI.find(Def.getPrevSlot());
+ if (!AssignI.valid() || AssignI.start() >= Def)
+ continue;
+ // If MI doesn't kill the assigned register, just leave it.
+ if (AssignI.stop() != Def)
+ continue;
+ unsigned RegIdx = AssignI.value();
+ if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) {
+ DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
+ forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
+ } else {
+ SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot();
+ DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
+ AssignI.setStop(Kill);
+ }
+ }
+}
+
+MachineBasicBlock*
+SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB) {
+ if (MBB == DefMBB)
+ return MBB;
+ assert(MDT.dominates(DefMBB, MBB) && "MBB must be dominated by the def.");
+
+ const MachineLoopInfo &Loops = SA.Loops;
+ const MachineLoop *DefLoop = Loops.getLoopFor(DefMBB);
+ MachineDomTreeNode *DefDomNode = MDT[DefMBB];
+
+ // Best candidate so far.
+ MachineBasicBlock *BestMBB = MBB;
+ unsigned BestDepth = UINT_MAX;
+
+ for (;;) {
+ const MachineLoop *Loop = Loops.getLoopFor(MBB);
+
+ // MBB isn't in a loop, it doesn't get any better. All dominators have a
+ // higher frequency by definition.
+ if (!Loop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth 0\n");
+ return MBB;
+ }
+
+ // We'll never be able to exit the DefLoop.
+ if (Loop == DefLoop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " in the same loop\n");
+ return MBB;
+ }
+
+ // Least busy dominator seen so far.
+ unsigned Depth = Loop->getLoopDepth();
+ if (Depth < BestDepth) {
+ BestMBB = MBB;
+ BestDepth = Depth;
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth " << Depth << '\n');
+ }
+
+ // Leave loop by going to the immediate dominator of the loop header.
+ // This is a bigger stride than simply walking up the dominator tree.
+ MachineDomTreeNode *IDom = MDT[Loop->getHeader()]->getIDom();
+
+ // Too far up the dominator tree?
+ if (!IDom || !MDT.dominates(DefDomNode, IDom))
+ return BestMBB;
+
+ MBB = IDom->getBlock();
+ }
+}
+
+void SplitEditor::hoistCopiesForSize() {
+ // Get the complement interval, always RegIdx 0.
+ LiveInterval *LI = Edit->get(0);
+ LiveInterval *Parent = &Edit->getParent();
+
+ // Track the nearest common dominator for all back-copies for each ParentVNI,
+ // indexed by ParentVNI->id.
+ typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
+ SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
+
+ // Find the nearest common dominator for parent values with multiple
+ // back-copies. If a single back-copy dominates, put it in DomPair.second.
+ for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ assert(ParentVNI && "Parent not live at complement def");
+
+ // Don't hoist remats. The complement is probably going to disappear
+ // completely anyway.
+ if (Edit->didRematerialize(ParentVNI))
+ continue;
+
+ MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
+ DomPair &Dom = NearestDom[ParentVNI->id];
+
+ // Keep directly defined parent values. This is either a PHI or an
+ // instruction in the complement range. All other copies of ParentVNI
+ // should be eliminated.
+ if (VNI->def == ParentVNI->def) {
+ DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n');
+ Dom = DomPair(ValMBB, VNI->def);
+ continue;
+ }
+ // Skip the singly mapped values. There is nothing to gain from hoisting a
+ // single back-copy.
+ if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) {
+ DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n');
+ continue;
+ }
+
+ if (!Dom.first) {
+ // First time we see ParentVNI. VNI dominates itself.
+ Dom = DomPair(ValMBB, VNI->def);
+ } else if (Dom.first == ValMBB) {
+ // Two defs in the same block. Pick the earlier def.
+ if (!Dom.second.isValid() || VNI->def < Dom.second)
+ Dom.second = VNI->def;
+ } else {
+ // Different basic blocks. Check if one dominates.
+ MachineBasicBlock *Near =
+ MDT.findNearestCommonDominator(Dom.first, ValMBB);
+ if (Near == ValMBB)
+ // Def ValMBB dominates.
+ Dom = DomPair(ValMBB, VNI->def);
+ else if (Near != Dom.first)
+ // None dominate. Hoist to common dominator, need new def.
+ Dom = DomPair(Near, SlotIndex());
+ }
+
+ DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
+ << " for parent " << ParentVNI->id << '@' << ParentVNI->def
+ << " hoist to BB#" << Dom.first->getNumber() << ' '
+ << Dom.second << '\n');
+ }
+
+ // Insert the hoisted copies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ DomPair &Dom = NearestDom[i];
+ if (!Dom.first || Dom.second.isValid())
+ continue;
+ // This value needs a hoisted copy inserted at the end of Dom.first.
+ VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
+ // Get a less loopy dominator than Dom.first.
+ Dom.first = findShallowDominator(Dom.first, DefMBB);
+ SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
+ Dom.second =
+ defFromParent(0, ParentVNI, Last, *Dom.first,
+ SA.getLastSplitPointIter(Dom.first))->def;
+ }
+
+ // Remove redundant back-copies that are now known to be dominated by another
+ // def with the same value.
+ SmallVector<VNInfo*, 8> BackCopies;
+ for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ const DomPair &Dom = NearestDom[ParentVNI->id];
+ if (!Dom.first || Dom.second == VNI->def)
+ continue;
+ BackCopies.push_back(VNI);
+ forceRecompute(0, ParentVNI);
+ }
+ removeBackCopies(BackCopies);
+}
+
+
+/// transferValues - Transfer all possible values to the new live ranges.
+/// Values that were rematerialized are left alone, they need LRCalc.extend().
+bool SplitEditor::transferValues() {
+ bool Skipped = false;
+ RegAssignMap::const_iterator AssignI = RegAssign.begin();
+ for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
+ ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
+ DEBUG(dbgs() << " blit " << *ParentI << ':');
+ VNInfo *ParentVNI = ParentI->valno;
+ // RegAssign has holes where RegIdx 0 should be used.
+ SlotIndex Start = ParentI->start;
+ AssignI.advanceTo(Start);
+ do {
+ unsigned RegIdx;
+ SlotIndex End = ParentI->end;
+ if (!AssignI.valid()) {
+ RegIdx = 0;
+ } else if (AssignI.start() <= Start) {
+ RegIdx = AssignI.value();
+ if (AssignI.stop() < End) {
+ End = AssignI.stop();
+ ++AssignI;
+ }
+ } else {
+ RegIdx = 0;
+ End = std::min(End, AssignI.start());
+ }
+
+ // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
+ DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // Check for a simply defined value that can be blitted directly.
+ ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
+ if (VNInfo *VNI = VFP.getPointer()) {
+ DEBUG(dbgs() << ':' << VNI->id);
+ LI->addRange(LiveRange(Start, End, VNI));
+ Start = End;
+ continue;
+ }
+
+ // Skip values with forced recomputation.
+ if (VFP.getInt()) {
+ DEBUG(dbgs() << "(recalc)");
+ Skipped = true;
+ Start = End;
+ continue;
+ }
+
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
+
+ // This value has multiple defs in RegIdx, but it wasn't rematerialized,
+ // so the live range is accurate. Add live-in blocks in [Start;End) to the
+ // LiveInBlocks.
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex BlockStart, BlockEnd;
+ tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+
+ // The first block may be live-in, or it may have its own def.
+ if (Start != BlockStart) {
+ VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped value");
+ DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+ // MBB has its own def. Is it also live-out?
+ if (BlockEnd <= End)
+ LRC.setLiveOutValue(MBB, VNI);
+
+ // Skip to the next block for live-in.
+ ++MBB;
+ BlockStart = BlockEnd;
+ }
+
+ // Handle the live-in blocks covered by [Start;End).
+ assert(Start <= BlockStart && "Expected live-in block");
+ while (BlockStart < End) {
+ DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+ BlockEnd = LIS.getMBBEndIdx(MBB);
+ if (BlockStart == ParentVNI->def) {
+ // This block has the def of a parent PHI, so it isn't live-in.
+ assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
+ VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped parent PHI");
+ if (End >= BlockEnd)
+ LRC.setLiveOutValue(MBB, VNI); // Live-out as well.
+ } else {
+ // This block needs a live-in value. The last block covered may not
+ // be live-out.
+ if (End < BlockEnd)
+ LRC.addLiveInBlock(LI, MDT[MBB], End);
+ else {
+ // Live-through, and we don't know the value.
+ LRC.addLiveInBlock(LI, MDT[MBB]);
+ LRC.setLiveOutValue(MBB, 0);
+ }
+ }
+ BlockStart = BlockEnd;
+ ++MBB;
+ }
+ Start = End;
+ } while (Start != ParentI->end);
+ DEBUG(dbgs() << '\n');
+ }
+
+ LRCalc[0].calculateValues();
+ if (SpillMode)
+ LRCalc[1].calculateValues();
+
+ return Skipped;
+}
+
+void SplitEditor::extendPHIKillRanges() {
+ // Extend live ranges to be live-out for successor PHI values.
+ for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+ E = Edit->getParent().vni_end(); I != E; ++I) {
+ const VNInfo *PHIVNI = *I;
+ if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+ LiveInterval *LI = Edit->get(RegIdx);
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex End = LIS.getMBBEndIdx(*PI);
+ SlotIndex LastUse = End.getPrevSlot();
+ // The predecessor may not have a live-out value. That is OK, like an
+ // undef PHI operand.
+ if (Edit->getParent().liveAt(LastUse)) {
+ assert(RegAssign.lookup(LastUse) == RegIdx &&
+ "Different register assignment in phi predecessor");
+ LRC.extend(LI, End);
+ }
+ }
+ }
+}
+
+/// rewriteAssigned - Rewrite all uses of Edit->getReg().
+void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ // LiveDebugVariables should have handled all DBG_VALUE instructions.
+ if (MI->isDebugValue()) {
+ DEBUG(dbgs() << "Zapping " << *MI);
+ MO.setReg(0);
+ continue;
+ }
+
+ // <undef> operands don't really read the register, so it doesn't matter
+ // which register we choose. When the use operand is tied to a def, we must
+ // use the same register as the def, so just do that always.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (MO.isDef() || MO.isUndef())
+ Idx = Idx.getRegSlot(MO.isEarlyClobber());
+
+ // Rewrite to the mapped register at Idx.
+ unsigned RegIdx = RegAssign.lookup(Idx);
+ LiveInterval *LI = Edit->get(RegIdx);
+ MO.setReg(LI->reg);
+ DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ << Idx << ':' << RegIdx << '\t' << *MI);
+
+ // Extend liveness to Idx if the instruction reads reg.
+ if (!ExtendRanges || MO.isUndef())
+ continue;
+
+ // Skip instructions that don't read Reg.
+ if (MO.isDef()) {
+ if (!MO.getSubReg() && !MO.isEarlyClobber())
+ continue;
+ // We may wan't to extend a live range for a partial redef, or for a use
+ // tied to an early clobber.
+ Idx = Idx.getPrevSlot();
+ if (!Edit->getParent().liveAt(Idx))
+ continue;
+ } else
+ Idx = Idx.getRegSlot(true);
+
+ getLRCalc(RegIdx).extend(LI, Idx.getNextSlot());
+ }
+}
+
+void SplitEditor::deleteRematVictims() {
+ SmallVector<MachineInstr*, 8> Dead;
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
+ LiveInterval *LI = *I;
+ for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
+ LII != LIE; ++LII) {
+ // Dead defs end at the dead slot.
+ if (LII->end != LII->valno->def.getDeadSlot())
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
+ assert(MI && "Missing instruction for dead def");
+ MI->addRegisterDead(LI->reg, &TRI);
+
+ if (!MI->allDefsAreDead())
+ continue;
+
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ Dead.push_back(MI);
+ }
+ }
+
+ if (Dead.empty())
+ return;
+
+ Edit->eliminateDeadDefs(Dead);
+}
+
+void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
+ ++NumFinished;
+
+ // At this point, the live intervals in Edit contain VNInfos corresponding to
+ // the inserted copies.
+
+ // Add the original defs from the parent interval.
+ for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+ E = Edit->getParent().vni_end(); I != E; ++I) {
+ const VNInfo *ParentVNI = *I;
+ if (ParentVNI->isUnused())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
+ defValue(RegIdx, ParentVNI, ParentVNI->def);
+
+ // Force rematted values to be recomputed everywhere.
+ // The new live ranges may be truncated.
+ if (Edit->didRematerialize(ParentVNI))
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ forceRecompute(i, ParentVNI);
+ }
+
+ // Hoist back-copies to the complement interval when in spill mode.
+ switch (SpillMode) {
+ case SM_Partition:
+ // Leave all back-copies as is.
+ break;
+ case SM_Size:
+ hoistCopiesForSize();
+ break;
+ case SM_Speed:
+ llvm_unreachable("Spill mode 'speed' not implemented yet");
+ }
+
+ // Transfer the simply mapped values, check if any are skipped.
+ bool Skipped = transferValues();
+ if (Skipped)
+ extendPHIKillRanges();
+ else
+ ++NumSimple;
+
+ // Rewrite virtual registers, possibly extending ranges.
+ rewriteAssigned(Skipped);
+
+ // Delete defs that were rematted everywhere.
+ if (Skipped)
+ deleteRematVictims();
+
+ // Get rid of unused values and set phi-kill flags.
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
+ (*I)->RenumberValues(LIS);
+
+ // Provide a reverse mapping from original indices to Edit ranges.
+ if (LRMap) {
+ LRMap->clear();
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ LRMap->push_back(i);
+ }
+
+ // Now check if any registers were separated into multiple components.
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
+ // Don't use iterators, they are invalidated by create() below.
+ LiveInterval *li = Edit->get(i);
+ unsigned NumComp = ConEQ.Classify(li);
+ if (NumComp <= 1)
+ continue;
+ DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n');
+ SmallVector<LiveInterval*, 8> dups;
+ dups.push_back(li);
+ for (unsigned j = 1; j != NumComp; ++j)
+ dups.push_back(&Edit->create());
+ ConEQ.Distribute(&dups[0], MRI);
+ // The new intervals all map back to i.
+ if (LRMap)
+ LRMap->resize(Edit->size(), i);
+ }
+
+ // Calculate spill weight and allocation hints for new intervals.
+ Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops);
+
+ assert(!LRMap || LRMap->size() == Edit->size());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Single Block Splitting
+//===----------------------------------------------------------------------===//
+
+bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
+ bool SingleInstrs) const {
+ // Always split for multiple instructions.
+ if (!BI.isOneInstr())
+ return true;
+ // Don't split for single instructions unless explicitly requested.
+ if (!SingleInstrs)
+ return false;
+ // Splitting a live-through range always makes progress.
+ if (BI.LiveIn && BI.LiveOut)
+ return true;
+ // No point in isolating a copy. It has no register class constraints.
+ if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike())
+ return false;
+ // Finally, don't isolate an end point that was created by earlier splits.
+ return isOriginalEndpoint(BI.FirstInstr);
+}
+
+void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
+ openIntv();
+ SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
+ SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr,
+ LastSplitPoint));
+ if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) {
+ useIntv(SegStart, leaveIntvAfter(BI.LastInstr));
+ } else {
+ // The last use is after the last valid split point.
+ SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
+ useIntv(SegStart, SegStop);
+ overlapIntv(SegStop, BI.LastInstr);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Global Live Range Splitting Support
+//===----------------------------------------------------------------------===//
+
+// These methods support a method of global live range splitting that uses a
+// global algorithm to decide intervals for CFG edges. They will insert split
+// points and color intervals in basic blocks while avoiding interference.
+//
+// Note that splitSingleBlock is also useful for blocks where both CFG edges
+// are on the stack.
+
+void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter){
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+
+ DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
+ << ") intf " << LeaveBefore << '-' << EnterAfter
+ << ", live-through " << IntvIn << " -> " << IntvOut);
+
+ assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+
+ assert((!LeaveBefore || LeaveBefore < Stop) && "Interference after block");
+ assert((!IntvIn || !LeaveBefore || LeaveBefore > Start) && "Impossible intf");
+ assert((!EnterAfter || EnterAfter >= Start) && "Interference before block");
+
+ MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+
+ if (!IntvOut) {
+ DEBUG(dbgs() << ", spill on entry.\n");
+ //
+ // <<<<<<<<< Possible LeaveBefore interference.
+ // |-----------| Live through.
+ // -____________ Spill on entry.
+ //
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAtTop(*MBB);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (!IntvIn) {
+ DEBUG(dbgs() << ", reload on exit.\n");
+ //
+ // >>>>>>> Possible EnterAfter interference.
+ // |-----------| Live through.
+ // ___________-- Reload on exit.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAtEnd(*MBB);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
+ DEBUG(dbgs() << ", straight through.\n");
+ //
+ // |-----------| Live through.
+ // ------------- Straight through, same intv, no interference.
+ //
+ selectIntv(IntvOut);
+ useIntv(Start, Stop);
+ return;
+ }
+
+ // We cannot legally insert splits after LSP.
+ SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+ assert((!IntvOut || !EnterAfter || EnterAfter < LSP) && "Impossible intf");
+
+ if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
+ LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
+ DEBUG(dbgs() << ", switch avoiding interference.\n");
+ //
+ // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ------======= Switch intervals between interference.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx;
+ if (LeaveBefore && LeaveBefore < LSP) {
+ Idx = enterIntvBefore(LeaveBefore);
+ useIntv(Idx, Stop);
+ } else {
+ Idx = enterIntvAtEnd(*MBB);
+ }
+ selectIntv(IntvIn);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ DEBUG(dbgs() << ", create local intv for interference.\n");
+ //
+ // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ==---------== Switch intervals before/after interference.
+ //
+ assert(LeaveBefore <= EnterAfter && "Missed case");
+
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ selectIntv(IntvIn);
+ Idx = leaveIntvBefore(LeaveBefore);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+}
+
+
+void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
+ << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
+ << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+
+ assert(IntvIn && "Must have register in");
+ assert(BI.LiveIn && "Must be live-in");
+ assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
+
+ if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) {
+ DEBUG(dbgs() << " before interference.\n");
+ //
+ // <<< Interference after kill.
+ // |---o---x | Killed in block.
+ // ========= Use IntvIn everywhere.
+ //
+ selectIntv(IntvIn);
+ useIntv(Start, BI.LastInstr);
+ return;
+ }
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) {
+ //
+ // <<< Possible interference after last use.
+ // |---o---o---| Live-out on stack.
+ // =========____ Leave IntvIn after last use.
+ //
+ // < Interference after last use.
+ // |---o---o--o| Live-out on stack, late last use.
+ // ============ Copy to stack after LSP, overlap IntvIn.
+ // \_____ Stack interval is live-out.
+ //
+ if (BI.LastInstr < LSP) {
+ DEBUG(dbgs() << ", spill after last use before interference.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAfter(BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ } else {
+ DEBUG(dbgs() << ", spill before last split point.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvBefore(LSP);
+ overlapIntv(Idx, BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ }
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvIn. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ unsigned LocalIntv = openIntv();
+ (void)LocalIntv;
+ DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+
+ if (!BI.LiveOut || BI.LastInstr < LSP) {
+ //
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o---| Live-out on stack.
+ // =====----____ Leave IntvIn before interference, then spill.
+ //
+ SlotIndex To = leaveIntvAfter(BI.LastInstr);
+ SlotIndex From = enterIntvBefore(LeaveBefore);
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+ return;
+ }
+
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o--o| Live-out on stack, late last use.
+ // =====------- Copy to stack before LSP, overlap LocalIntv.
+ // \_____ Stack interval is live-out.
+ //
+ SlotIndex To = leaveIntvBefore(LSP);
+ overlapIntv(To, BI.LastInstr);
+ SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
+ << ", reg-out " << IntvOut << ", enter after " << EnterAfter
+ << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ assert(IntvOut && "Must have register out");
+ assert(BI.LiveOut && "Must be live-out");
+ assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
+
+ if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) {
+ DEBUG(dbgs() << " after interference.\n");
+ //
+ // >>>> Interference before def.
+ // | o---o---| Defined in block.
+ // ========= Use IntvOut everywhere.
+ //
+ selectIntv(IntvOut);
+ useIntv(BI.FirstInstr, Stop);
+ return;
+ }
+
+ if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) {
+ DEBUG(dbgs() << ", reload after interference.\n");
+ //
+ // >>>> Interference before def.
+ // |---o---o---| Live-through, stack-in.
+ // ____========= Enter IntvOut before first use.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstInstr));
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvOut. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ DEBUG(dbgs() << ", interference overlaps uses.\n");
+ //
+ // >>>>>>> Interference overlapping uses.
+ // |---o---o---| Live-through, stack-in.
+ // ____---====== Create local interval for interference range.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ openIntv();
+ SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr));
+ useIntv(From, Idx);
+}
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
new file mode 100644
index 0000000..4005a3d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -0,0 +1,469 @@
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPLITKIT_H
+#define LLVM_CODEGEN_SPLITKIT_H
+
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class ConnectedVNInfoEqClasses;
+class LiveInterval;
+class LiveIntervals;
+class LiveRangeEdit;
+class MachineInstr;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+class VNInfo;
+class raw_ostream;
+
+/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
+/// opportunities.
+class SplitAnalysis {
+public:
+ const MachineFunction &MF;
+ const VirtRegMap &VRM;
+ const LiveIntervals &LIS;
+ const MachineLoopInfo &Loops;
+ const TargetInstrInfo &TII;
+
+ /// Additional information about basic blocks where the current variable is
+ /// live. Such a block will look like one of these templates:
+ ///
+ /// 1. | o---x | Internal to block. Variable is only live in this block.
+ /// 2. |---x | Live-in, kill.
+ /// 3. | o---| Def, live-out.
+ /// 4. |---x o---| Live-in, kill, def, live-out. Counted by NumGapBlocks.
+ /// 5. |---o---o---| Live-through with uses or defs.
+ /// 6. |-----------| Live-through without uses. Counted by NumThroughBlocks.
+ ///
+ /// Two BlockInfo entries are created for template 4. One for the live-in
+ /// segment, and one for the live-out segment. These entries look as if the
+ /// block were split in the middle where the live range isn't live.
+ ///
+ /// Live-through blocks without any uses don't get BlockInfo entries. They
+ /// are simply listed in ThroughBlocks instead.
+ ///
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ SlotIndex FirstInstr; ///< First instr accessing current reg.
+ SlotIndex LastInstr; ///< Last instr accessing current reg.
+ SlotIndex FirstDef; ///< First non-phi valno->def, or SlotIndex().
+ bool LiveIn; ///< Current reg is live in.
+ bool LiveOut; ///< Current reg is live out.
+
+ /// isOneInstr - Returns true when this BlockInfo describes a single
+ /// instruction.
+ bool isOneInstr() const {
+ return SlotIndex::isSameInstr(FirstInstr, LastInstr);
+ }
+ };
+
+private:
+ // Current live interval.
+ const LiveInterval *CurLI;
+
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
+
+ /// LastSplitPoint - Last legal split point in each basic block in the current
+ /// function. The first entry is the first terminator, the second entry is the
+ /// last valid split point for a variable that is live in to a landing pad
+ /// successor.
+ SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint;
+
+ /// UseBlocks - Blocks where CurLI has uses.
+ SmallVector<BlockInfo, 8> UseBlocks;
+
+ /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
+ /// the live range has a gap.
+ unsigned NumGapBlocks;
+
+ /// ThroughBlocks - Block numbers where CurLI is live through without uses.
+ BitVector ThroughBlocks;
+
+ /// NumThroughBlocks - Number of live-through blocks.
+ unsigned NumThroughBlocks;
+
+ /// DidRepairRange - analyze was forced to shrinkToUses().
+ bool DidRepairRange;
+
+ SlotIndex computeLastSplitPoint(unsigned Num);
+
+ // Sumarize statistics by counting instructions using CurLI.
+ void analyzeUses();
+
+ /// calcLiveBlockInfo - Compute per-block information about CurLI.
+ bool calcLiveBlockInfo();
+
+public:
+ SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+ const MachineLoopInfo &mli);
+
+ /// analyze - set CurLI to the specified interval, and analyze how it may be
+ /// split.
+ void analyze(const LiveInterval *li);
+
+ /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
+ /// by analyze(). This really shouldn't happen, but sometimes the coalescer
+ /// can create live ranges that end in mid-air.
+ bool didRepairRange() const { return DidRepairRange; }
+
+ /// clear - clear all data structures so SplitAnalysis is ready to analyze a
+ /// new interval.
+ void clear();
+
+ /// getParent - Return the last analyzed interval.
+ const LiveInterval &getParent() const { return *CurLI; }
+
+ /// getLastSplitPoint - Return the base index of the last valid split point
+ /// in the basic block numbered Num.
+ SlotIndex getLastSplitPoint(unsigned Num) {
+ // Inline the common simple case.
+ if (LastSplitPoint[Num].first.isValid() &&
+ !LastSplitPoint[Num].second.isValid())
+ return LastSplitPoint[Num].first;
+ return computeLastSplitPoint(Num);
+ }
+
+ /// getLastSplitPointIter - Returns the last split point as an iterator.
+ MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*);
+
+ /// isOriginalEndpoint - Return true if the original live range was killed or
+ /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
+ /// and 'use' for an early-clobber def.
+ /// This can be used to recognize code inserted by earlier live range
+ /// splitting.
+ bool isOriginalEndpoint(SlotIndex Idx) const;
+
+ /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI.
+ /// This include both use and def operands, at most one entry per instruction.
+ ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; }
+
+ /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
+ /// where CurLI has uses.
+ ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
+
+ /// getNumThroughBlocks - Return the number of through blocks.
+ unsigned getNumThroughBlocks() const { return NumThroughBlocks; }
+
+ /// isThroughBlock - Return true if CurLI is live through MBB without uses.
+ bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); }
+
+ /// getThroughBlocks - Return the set of through blocks.
+ const BitVector &getThroughBlocks() const { return ThroughBlocks; }
+
+ /// getNumLiveBlocks - Return the number of blocks where CurLI is live.
+ unsigned getNumLiveBlocks() const {
+ return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks();
+ }
+
+ /// countLiveBlocks - Return the number of blocks where li is live. This is
+ /// guaranteed to return the same number as getNumLiveBlocks() after calling
+ /// analyze(li).
+ unsigned countLiveBlocks(const LiveInterval *li) const;
+
+ typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+
+ /// shouldSplitSingleBlock - Returns true if it would help to create a local
+ /// live range for the instructions in BI. There is normally no benefit to
+ /// creating a live range for a single instruction, but it does enable
+ /// register class inflation if the instruction has a restricted register
+ /// class.
+ ///
+ /// @param BI The block to be isolated.
+ /// @param SingleInstrs True when single instructions should be isolated.
+ bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const;
+};
+
+
+/// SplitEditor - Edit machine code and LiveIntervals for live range
+/// splitting.
+///
+/// - Create a SplitEditor from a SplitAnalysis.
+/// - Start a new live interval with openIntv.
+/// - Mark the places where the new interval is entered using enterIntv*
+/// - Mark the ranges where the new interval is used with useIntv*
+/// - Mark the places where the interval is exited with exitIntv*.
+/// - Finish the current interval with closeIntv and repeat from 2.
+/// - Rewrite instructions with finish().
+///
+class SplitEditor {
+ SplitAnalysis &SA;
+ LiveIntervals &LIS;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ MachineDominatorTree &MDT;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+public:
+
+ /// ComplementSpillMode - Select how the complement live range should be
+ /// created. SplitEditor automatically creates interval 0 to contain
+ /// anything that isn't added to another interval. This complement interval
+ /// can get quite complicated, and it can sometimes be an advantage to allow
+ /// it to overlap the other intervals. If it is going to spill anyway, no
+ /// registers are wasted by keeping a value in two places at the same time.
+ enum ComplementSpillMode {
+ /// SM_Partition(Default) - Try to create the complement interval so it
+ /// doesn't overlap any other intervals, and the original interval is
+ /// partitioned. This may require a large number of back copies and extra
+ /// PHI-defs. Only segments marked with overlapIntv will be overlapping.
+ SM_Partition,
+
+ /// SM_Size - Overlap intervals to minimize the number of inserted COPY
+ /// instructions. Copies to the complement interval are hoisted to their
+ /// common dominator, so only one COPY is required per value in the
+ /// complement interval. This also means that no extra PHI-defs need to be
+ /// inserted in the complement interval.
+ SM_Size,
+
+ /// SM_Speed - Overlap intervals to minimize the expected execution
+ /// frequency of the inserted copies. This is very similar to SM_Size, but
+ /// the complement interval may get some extra PHI-defs.
+ SM_Speed
+ };
+
+private:
+
+ /// Edit - The current parent register and new intervals created.
+ LiveRangeEdit *Edit;
+
+ /// Index into Edit of the currently open interval.
+ /// The index 0 is used for the complement, so the first interval started by
+ /// openIntv will be 1.
+ unsigned OpenIdx;
+
+ /// The current spill mode, selected by reset().
+ ComplementSpillMode SpillMode;
+
+ typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+ /// Allocator for the interval map. This will eventually be shared with
+ /// SlotIndexes and LiveIntervals.
+ RegAssignMap::Allocator Allocator;
+
+ /// RegAssign - Map of the assigned register indexes.
+ /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+ /// Idx.
+ RegAssignMap RegAssign;
+
+ typedef PointerIntPair<VNInfo*, 1> ValueForcePair;
+ typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap;
+
+ /// Values - keep track of the mapping from parent values to values in the new
+ /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
+ ///
+ /// 1. No entry - the value is not mapped to Edit.get(RegIdx).
+ /// 2. (Null, false) - the value is mapped to multiple values in
+ /// Edit.get(RegIdx). Each value is represented by a minimal live range at
+ /// its def. The full live range can be inferred exactly from the range
+ /// of RegIdx in RegAssign.
+ /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and
+ /// the live range must be recomputed using LiveRangeCalc::extend().
+ /// 4. (VNI, false) The value is mapped to a single new value.
+ /// The new value has no live ranges anywhere.
+ ValueMap Values;
+
+ /// LRCalc - Cache for computing live ranges and SSA update. Each instance
+ /// can only handle non-overlapping live ranges, so use a separate
+ /// LiveRangeCalc instance for the complement interval when in spill mode.
+ LiveRangeCalc LRCalc[2];
+
+ /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the
+ /// complement interval can overlap the other intervals, so it gets its own
+ /// LRCalc instance. When not in spill mode, all intervals can share one.
+ LiveRangeCalc &getLRCalc(unsigned RegIdx) {
+ return LRCalc[SpillMode != SM_Partition && RegIdx != 0];
+ }
+
+ /// defValue - define a value in RegIdx from ParentVNI at Idx.
+ /// Idx does not have to be ParentVNI->def, but it must be contained within
+ /// ParentVNI's live range in ParentLI. The new value is added to the value
+ /// map.
+ /// Return the new LI value.
+ VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+
+ /// forceRecompute - Force the live range of ParentVNI in RegIdx to be
+ /// recomputed by LiveRangeCalc::extend regardless of the number of defs.
+ /// This is used for values whose live range doesn't match RegAssign exactly.
+ /// They could have rematerialized, or back-copies may have been moved.
+ void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI);
+
+ /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+ /// rematerialization or a COPY from parent. Return the new value.
+ VNInfo *defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+ /// removeBackCopies - Remove the copy instructions that defines the values
+ /// in the vector in the complement interval.
+ void removeBackCopies(SmallVectorImpl<VNInfo*> &Copies);
+
+ /// getShallowDominator - Returns the least busy dominator of MBB that is
+ /// also dominated by DefMBB. Busy is measured by loop depth.
+ MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB);
+
+ /// hoistCopiesForSize - Hoist back-copies to the complement interval in a
+ /// way that minimizes code size. This implements the SM_Size spill mode.
+ void hoistCopiesForSize();
+
+ /// transferValues - Transfer values to the new ranges.
+ /// Return true if any ranges were skipped.
+ bool transferValues();
+
+ /// extendPHIKillRanges - Extend the ranges of all values killed by original
+ /// parent PHIDefs.
+ void extendPHIKillRanges();
+
+ /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+ void rewriteAssigned(bool ExtendRanges);
+
+ /// deleteRematVictims - Delete defs that are dead after rematerializing.
+ void deleteRematVictims();
+
+public:
+ /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+ /// Newly created intervals will be appended to newIntervals.
+ SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
+ MachineDominatorTree&);
+
+ /// reset - Prepare for a new split.
+ void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
+
+ /// Create a new virtual register and live interval.
+ /// Return the interval index, starting from 1. Interval index 0 is the
+ /// implicit complement interval.
+ unsigned openIntv();
+
+ /// currentIntv - Return the current interval index.
+ unsigned currentIntv() const { return OpenIdx; }
+
+ /// selectIntv - Select a previously opened interval index.
+ void selectIntv(unsigned Idx);
+
+ /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+ /// If the parent interval is not live before Idx, a COPY is not inserted.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvBefore(SlotIndex Idx);
+
+ /// enterIntvAfter - Enter the open interval after the instruction at Idx.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAfter(SlotIndex Idx);
+
+ /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+ /// Use the open interval from he inserted copy to the MBB end.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in MBB should use OpenLI.
+ void useIntv(const MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in range should use OpenLI.
+ void useIntv(SlotIndex Start, SlotIndex End);
+
+ /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvBefore(SlotIndex Idx);
+
+ /// leaveIntvAtTop - Leave the interval at the top of MBB.
+ /// Add liveness from the MBB top to the copy.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+ /// overlapIntv - Indicate that all instructions in range should use the open
+ /// interval, but also let the complement interval be live.
+ ///
+ /// This doubles the register pressure, but is sometimes required to deal with
+ /// register uses after the last valid split point.
+ ///
+ /// The Start index should be a return value from a leaveIntv* call, and End
+ /// should be in the same basic block. The parent interval must have the same
+ /// value across the range.
+ ///
+ void overlapIntv(SlotIndex Start, SlotIndex End);
+
+ /// finish - after all the new live ranges have been created, compute the
+ /// remaining live range, and rewrite instructions to use the new registers.
+ /// @param LRMap When not null, this vector will map each live range in Edit
+ /// back to the indices returned by openIntv.
+ /// There may be extra indices created by dead code elimination.
+ void finish(SmallVectorImpl<unsigned> *LRMap = 0);
+
+ /// dump - print the current interval maping to dbgs().
+ void dump() const;
+
+ // ===--- High level methods ---===
+
+ /// splitSingleBlock - Split CurLI into a separate live interval around the
+ /// uses in a single block. This is intended to be used as part of a larger
+ /// split, and doesn't call finish().
+ void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
+
+ /// splitLiveThroughBlock - Split CurLI in the given block such that it
+ /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
+ /// the block, but they will be ignored when placing split points.
+ ///
+ /// @param MBBNum Block number.
+ /// @param IntvIn Interval index entering the block.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter);
+
+ /// splitRegInBlock - Split CurLI in the given block such that it enters the
+ /// block in IntvIn and leaves it on the stack (or not at all). Split points
+ /// are placed in a way that avoids putting uses in the stack interval. This
+ /// may require creating a local interval when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvIn Interval index entering the block. Not 0.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ void splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore);
+
+ /// splitRegOutBlock - Split CurLI in the given block such that it enters the
+ /// block on the stack (or isn't live-in at all) and leaves it in IntvOut.
+ /// Split points are placed to avoid interference and such that the uses are
+ /// not in the stack interval. This may require creating a local interval
+ /// when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
new file mode 100644
index 0000000..a789a25
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -0,0 +1,802 @@
+//===-- StackColoring.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements the stack-coloring optimization that looks for
+// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
+// which represent the possible lifetime of stack slots. It attempts to
+// merge disjoint stack slots and reduce the used stack space.
+// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
+//
+// TODO: In the future we plan to improve stack coloring in the following ways:
+// 1. Allow merging multiple small slots into a single larger slot at different
+// offsets.
+// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableColoring("no-stack-coloring",
+ cl::init(false), cl::Hidden,
+ cl::desc("Disable stack coloring"));
+
+/// The user may write code that uses allocas outside of the declared lifetime
+/// zone. This can happen when the user returns a reference to a local
+/// data-structure. We can detect these cases and decide not to optimize the
+/// code. If this flag is enabled, we try to save the user.
+static cl::opt<bool>
+ProtectFromEscapedAllocas("protect-from-escaped-allocas",
+ cl::init(false), cl::Hidden,
+ cl::desc("Do not optimize lifetime zones that "
+ "are broken"));
+
+STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
+STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
+STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+
+//===----------------------------------------------------------------------===//
+// StackColoring Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// StackColoring - A machine pass for merging disjoint stack allocations,
+/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
+class StackColoring : public MachineFunctionPass {
+ MachineFrameInfo *MFI;
+ MachineFunction *MF;
+
+ /// A class representing liveness information for a single basic block.
+ /// Each bit in the BitVector represents the liveness property
+ /// for a different stack slot.
+ struct BlockLifetimeInfo {
+ /// Which slots BEGINs in each basic block.
+ BitVector Begin;
+ /// Which slots ENDs in each basic block.
+ BitVector End;
+ /// Which slots are marked as LIVE_IN, coming into each basic block.
+ BitVector LiveIn;
+ /// Which slots are marked as LIVE_OUT, coming out of each basic block.
+ BitVector LiveOut;
+ };
+
+ /// Maps active slots (per bit) for each basic block.
+ typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap;
+ LivenessMap BlockLiveness;
+
+ /// Maps serial numbers to basic blocks.
+ DenseMap<const MachineBasicBlock*, int> BasicBlocks;
+ /// Maps basic blocks to a serial number.
+ SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
+
+ /// Maps liveness intervals for each slot.
+ SmallVector<LiveInterval*, 16> Intervals;
+ /// VNInfo is used for the construction of LiveIntervals.
+ VNInfo::Allocator VNInfoAllocator;
+ /// SlotIndex analysis object.
+ SlotIndexes *Indexes;
+
+ /// The list of lifetime markers found. These markers are to be removed
+ /// once the coloring is done.
+ SmallVector<MachineInstr*, 8> Markers;
+
+ /// SlotSizeSorter - A Sort utility for arranging stack slots according
+ /// to their size.
+ struct SlotSizeSorter {
+ MachineFrameInfo *MFI;
+ SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { }
+ bool operator()(int LHS, int RHS) {
+ // We use -1 to denote a uninteresting slot. Place these slots at the end.
+ if (LHS == -1) return false;
+ if (RHS == -1) return true;
+ // Sort according to size.
+ return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+ }
+};
+
+public:
+ static char ID;
+ StackColoring() : MachineFunctionPass(ID) {
+ initializeStackColoringPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+ /// Debug.
+ void dump() const;
+
+ /// Removes all of the lifetime marker instructions from the function.
+ /// \returns true if any markers were removed.
+ bool removeAllMarkers();
+
+ /// Scan the machine function and find all of the lifetime markers.
+ /// Record the findings in the BEGIN and END vectors.
+ /// \returns the number of markers found.
+ unsigned collectMarkers(unsigned NumSlot);
+
+ /// Perform the dataflow calculation and calculate the lifetime for each of
+ /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
+ /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
+ /// in and out blocks.
+ void calculateLocalLiveness();
+
+ /// Construct the LiveIntervals for the slots.
+ void calculateLiveIntervals(unsigned NumSlots);
+
+ /// Go over the machine function and change instructions which use stack
+ /// slots to use the joint slots.
+ void remapInstructions(DenseMap<int, int> &SlotRemap);
+
+ /// The input program may contain intructions which are not inside lifetime
+ /// markers. This can happen due to a bug in the compiler or due to a bug in
+ /// user code (for example, returning a reference to a local variable).
+ /// This procedure checks all of the instructions in the function and
+ /// invalidates lifetime ranges which do not contain all of the instructions
+ /// which access that frame slot.
+ void removeInvalidSlotRanges();
+
+ /// Map entries which point to other entries to their destination.
+ /// A->B->C becomes A->C.
+ void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+};
+} // end anonymous namespace
+
+char StackColoring::ID = 0;
+char &llvm::StackColoringID = StackColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackColoring,
+ "stack-coloring", "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(StackColoring,
+ "stack-coloring", "Merge disjoint stack slots", false, false)
+
+void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void StackColoring::dump() const {
+ for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
+ FI != FE; ++FI) {
+ DEBUG(dbgs()<<"Inspecting block #"<<BasicBlocks.lookup(*FI)<<
+ " ["<<FI->getName()<<"]\n");
+
+ LivenessMap::const_iterator BI = BlockLiveness.find(*FI);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ const BlockLifetimeInfo &BlockInfo = BI->second;
+
+ DEBUG(dbgs()<<"BEGIN : {");
+ for (unsigned i=0; i < BlockInfo.Begin.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" ");
+ DEBUG(dbgs()<<"}\n");
+
+ DEBUG(dbgs()<<"END : {");
+ for (unsigned i=0; i < BlockInfo.End.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.End.test(i)<<" ");
+
+ DEBUG(dbgs()<<"}\n");
+
+ DEBUG(dbgs()<<"LIVE_IN: {");
+ for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" ");
+
+ DEBUG(dbgs()<<"}\n");
+ DEBUG(dbgs()<<"LIVEOUT: {");
+ for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i)
+ DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" ");
+ DEBUG(dbgs()<<"}\n");
+ }
+}
+
+unsigned StackColoring::collectMarkers(unsigned NumSlot) {
+ unsigned MarkersFound = 0;
+ // Scan the function to find all lifetime markers.
+ // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a
+ // deterministic numbering, and because we'll need a post-order iteration
+ // later for solving the liveness dataflow problem.
+ for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
+ FI != FE; ++FI) {
+
+ // Assign a serial number to this basic block.
+ BasicBlocks[*FI] = BasicBlockNumbering.size();
+ BasicBlockNumbering.push_back(*FI);
+
+ // Keep a reference to avoid repeated lookups.
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI];
+
+ BlockInfo.Begin.resize(NumSlot);
+ BlockInfo.End.resize(NumSlot);
+
+ for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end();
+ BI != BE; ++BI) {
+
+ if (BI->getOpcode() != TargetOpcode::LIFETIME_START &&
+ BI->getOpcode() != TargetOpcode::LIFETIME_END)
+ continue;
+
+ Markers.push_back(BI);
+
+ bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START;
+ const MachineOperand &MI = BI->getOperand(0);
+ unsigned Slot = MI.getIndex();
+
+ MarkersFound++;
+
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+ if (Allocation) {
+ DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<<
+ " with allocation: "<< Allocation->getName()<<"\n");
+ }
+
+ if (IsStart) {
+ BlockInfo.Begin.set(Slot);
+ } else {
+ if (BlockInfo.Begin.test(Slot)) {
+ // Allocas that start and end within a single block are handled
+ // specially when computing the LiveIntervals to avoid pessimizing
+ // the liveness propagation.
+ BlockInfo.Begin.reset(Slot);
+ } else {
+ BlockInfo.End.set(Slot);
+ }
+ }
+ }
+ }
+
+ // Update statistics.
+ NumMarkerSeen += MarkersFound;
+ return MarkersFound;
+}
+
+void StackColoring::calculateLocalLiveness() {
+ // Perform a standard reverse dataflow computation to solve for
+ // global liveness. The BEGIN set here is equivalent to KILL in the standard
+ // formulation, and END is equivalent to GEN. The result of this computation
+ // is a map from blocks to bitvectors where the bitvectors represent which
+ // allocas are live in/out of that block.
+ SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
+ BasicBlockNumbering.end());
+ unsigned NumSSMIters = 0;
+ bool changed = true;
+ while (changed) {
+ changed = false;
+ ++NumSSMIters;
+
+ SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet;
+
+ for (SmallVector<const MachineBasicBlock*, 8>::iterator
+ PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
+ PI != PE; ++PI) {
+
+ const MachineBasicBlock *BB = *PI;
+ if (!BBSet.count(BB)) continue;
+
+ // Use an iterator to avoid repeated lookups.
+ LivenessMap::iterator BI = BlockLiveness.find(BB);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ BlockLifetimeInfo &BlockInfo = BI->second;
+
+ BitVector LocalLiveIn;
+ BitVector LocalLiveOut;
+
+ // Forward propagation from begins to ends.
+ for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
+ PE = BB->pred_end(); PI != PE; ++PI) {
+ LivenessMap::const_iterator I = BlockLiveness.find(*PI);
+ assert(I != BlockLiveness.end() && "Predecessor not found");
+ LocalLiveIn |= I->second.LiveOut;
+ }
+ LocalLiveIn |= BlockInfo.End;
+ LocalLiveIn.reset(BlockInfo.Begin);
+
+ // Reverse propagation from ends to begins.
+ for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ LivenessMap::const_iterator I = BlockLiveness.find(*SI);
+ assert(I != BlockLiveness.end() && "Successor not found");
+ LocalLiveOut |= I->second.LiveIn;
+ }
+ LocalLiveOut |= BlockInfo.Begin;
+ LocalLiveOut.reset(BlockInfo.End);
+
+ LocalLiveIn |= LocalLiveOut;
+ LocalLiveOut |= LocalLiveIn;
+
+ // After adopting the live bits, we need to turn-off the bits which
+ // are de-activated in this block.
+ LocalLiveOut.reset(BlockInfo.End);
+ LocalLiveIn.reset(BlockInfo.Begin);
+
+ // If we have both BEGIN and END markers in the same basic block then
+ // we know that the BEGIN marker comes after the END, because we already
+ // handle the case where the BEGIN comes before the END when collecting
+ // the markers (and building the BEGIN/END vectore).
+ // Want to enable the LIVE_IN and LIVE_OUT of slots that have both
+ // BEGIN and END because it means that the value lives before and after
+ // this basic block.
+ BitVector LocalEndBegin = BlockInfo.End;
+ LocalEndBegin &= BlockInfo.Begin;
+ LocalLiveIn |= LocalEndBegin;
+ LocalLiveOut |= LocalEndBegin;
+
+ if (LocalLiveIn.test(BlockInfo.LiveIn)) {
+ changed = true;
+ BlockInfo.LiveIn |= LocalLiveIn;
+
+ for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
+ PE = BB->pred_end(); PI != PE; ++PI)
+ NextBBSet.insert(*PI);
+ }
+
+ if (LocalLiveOut.test(BlockInfo.LiveOut)) {
+ changed = true;
+ BlockInfo.LiveOut |= LocalLiveOut;
+
+ for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ NextBBSet.insert(*SI);
+ }
+ }
+
+ BBSet = NextBBSet;
+ }// while changed.
+}
+
+void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
+ SmallVector<SlotIndex, 16> Starts;
+ SmallVector<SlotIndex, 16> Finishes;
+
+ // For each block, find which slots are active within this block
+ // and update the live intervals.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end();
+ MBB != MBBe; ++MBB) {
+ Starts.clear();
+ Starts.resize(NumSlots);
+ Finishes.clear();
+ Finishes.resize(NumSlots);
+
+ // Create the interval for the basic blocks with lifetime markers in them.
+ for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(),
+ e = Markers.end(); it != e; ++it) {
+ const MachineInstr *MI = *it;
+ if (MI->getParent() != MBB)
+ continue;
+
+ assert((MI->getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI->getOpcode() == TargetOpcode::LIFETIME_END) &&
+ "Invalid Lifetime marker");
+
+ bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
+ const MachineOperand &Mo = MI->getOperand(0);
+ int Slot = Mo.getIndex();
+ assert(Slot >= 0 && "Invalid slot");
+
+ SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+
+ if (IsStart) {
+ if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+ Starts[Slot] = ThisIndex;
+ } else {
+ if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+ Finishes[Slot] = ThisIndex;
+ }
+ }
+
+ // Create the interval of the blocks that we previously found to be 'alive'.
+ BitVector Alive = BlockLiveness[MBB].LiveIn;
+ Alive |= BlockLiveness[MBB].LiveOut;
+
+ if (Alive.any()) {
+ for (int pos = Alive.find_first(); pos != -1;
+ pos = Alive.find_next(pos)) {
+ if (!Starts[pos].isValid())
+ Starts[pos] = Indexes->getMBBStartIdx(MBB);
+ if (!Finishes[pos].isValid())
+ Finishes[pos] = Indexes->getMBBEndIdx(MBB);
+ }
+ }
+
+ for (unsigned i = 0; i < NumSlots; ++i) {
+ assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range");
+ if (!Starts[i].isValid())
+ continue;
+
+ assert(Starts[i] && Finishes[i] && "Invalid interval");
+ VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
+ SlotIndex S = Starts[i];
+ SlotIndex F = Finishes[i];
+ if (S < F) {
+ // We have a single consecutive region.
+ Intervals[i]->addRange(LiveRange(S, F, ValNum));
+ } else {
+ // We have two non consecutive regions. This happens when
+ // LIFETIME_START appears after the LIFETIME_END marker.
+ SlotIndex NewStart = Indexes->getMBBStartIdx(MBB);
+ SlotIndex NewFin = Indexes->getMBBEndIdx(MBB);
+ Intervals[i]->addRange(LiveRange(NewStart, F, ValNum));
+ Intervals[i]->addRange(LiveRange(S, NewFin, ValNum));
+ }
+ }
+ }
+}
+
+bool StackColoring::removeAllMarkers() {
+ unsigned Count = 0;
+ for (unsigned i = 0; i < Markers.size(); ++i) {
+ Markers[i]->eraseFromParent();
+ Count++;
+ }
+ Markers.clear();
+
+ DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
+ return Count;
+}
+
+void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
+ unsigned FixedInstr = 0;
+ unsigned FixedMemOp = 0;
+ unsigned FixedDbg = 0;
+ MachineModuleInfo *MMI = &MF->getMMI();
+
+ // Remap debug information that refers to stack slots.
+ MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+ for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+ VE = VMap.end(); VI != VE; ++VI) {
+ const MDNode *Var = VI->first;
+ if (!Var) continue;
+ std::pair<unsigned, DebugLoc> &VP = VI->second;
+ if (SlotRemap.count(VP.first)) {
+ DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n");
+ VP.first = SlotRemap[VP.first];
+ FixedDbg++;
+ }
+ }
+
+ // Keep a list of *allocas* which need to be remapped.
+ DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
+ for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(),
+ e = SlotRemap.end(); it != e; ++it) {
+ const AllocaInst *From = MFI->getObjectAllocation(it->first);
+ const AllocaInst *To = MFI->getObjectAllocation(it->second);
+ assert(To && From && "Invalid allocation object");
+ Allocas[From] = To;
+ }
+
+ // Remap all instructions to the new stack slots.
+ MachineFunction::iterator BB, BBE;
+ MachineBasicBlock::iterator I, IE;
+ for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
+ for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+
+ // Skip lifetime markers. We'll remove them soon.
+ if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
+ I->getOpcode() == TargetOpcode::LIFETIME_END)
+ continue;
+
+ // Update the MachineMemOperand to use the new alloca.
+ for (MachineInstr::mmo_iterator MM = I->memoperands_begin(),
+ E = I->memoperands_end(); MM != E; ++MM) {
+ MachineMemOperand *MMO = *MM;
+
+ const Value *V = MMO->getValue();
+
+ if (!V)
+ continue;
+
+ // Climb up and find the original alloca.
+ V = GetUnderlyingObject(V);
+ // If we did not find one, or if the one that we found is not in our
+ // map, then move on.
+ if (!V || !isa<AllocaInst>(V)) {
+ // Clear mem operand since we don't know for sure that it doesn't
+ // alias a merged alloca.
+ MMO->setValue(0);
+ continue;
+ }
+ const AllocaInst *AI= cast<AllocaInst>(V);
+ if (!Allocas.count(AI))
+ continue;
+
+ MMO->setValue(Allocas[AI]);
+ FixedMemOp++;
+ }
+
+ // Update all of the machine instruction operands.
+ for (unsigned i = 0 ; i < I->getNumOperands(); ++i) {
+ MachineOperand &MO = I->getOperand(i);
+
+ if (!MO.isFI())
+ continue;
+ int FromSlot = MO.getIndex();
+
+ // Don't touch arguments.
+ if (FromSlot<0)
+ continue;
+
+ // Only look at mapped slots.
+ if (!SlotRemap.count(FromSlot))
+ continue;
+
+ // In a debug build, check that the instruction that we are modifying is
+ // inside the expected live range. If the instruction is not inside
+ // the calculated range then it means that the alloca usage moved
+ // outside of the lifetime markers, or that the user has a bug.
+ // NOTE: Alloca address calculations which happen outside the lifetime
+ // zone are are okay, despite the fact that we don't have a good way
+ // for validating all of the usages of the calculation.
+#ifndef NDEBUG
+ bool TouchesMemory = I->mayLoad() || I->mayStore();
+ // If we *don't* protect the user from escaped allocas, don't bother
+ // validating the instructions.
+ if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ LiveInterval *Interval = Intervals[FromSlot];
+ assert(Interval->find(Index) != Interval->end() &&
+ "Found instruction usage outside of live range.");
+ }
+#endif
+
+ // Fix the machine instructions.
+ int ToSlot = SlotRemap[FromSlot];
+ MO.setIndex(ToSlot);
+ FixedInstr++;
+ }
+ }
+
+ DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
+ DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
+ DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
+}
+
+void StackColoring::removeInvalidSlotRanges() {
+ MachineFunction::const_iterator BB, BBE;
+ MachineBasicBlock::const_iterator I, IE;
+ for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
+ for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+
+ if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
+ I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue())
+ continue;
+
+ // Some intervals are suspicious! In some cases we find address
+ // calculations outside of the lifetime zone, but not actual memory
+ // read or write. Memory accesses outside of the lifetime zone are a clear
+ // violation, but address calculations are okay. This can happen when
+ // GEPs are hoisted outside of the lifetime zone.
+ // So, in here we only check instructions which can read or write memory.
+ if (!I->mayLoad() && !I->mayStore())
+ continue;
+
+ // Check all of the machine operands.
+ for (unsigned i = 0 ; i < I->getNumOperands(); ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+
+ if (!MO.isFI())
+ continue;
+
+ int Slot = MO.getIndex();
+
+ if (Slot<0)
+ continue;
+
+ if (Intervals[Slot]->empty())
+ continue;
+
+ // Check that the used slot is inside the calculated lifetime range.
+ // If it is not, warn about it and invalidate the range.
+ LiveInterval *Interval = Intervals[Slot];
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ if (Interval->find(Index) == Interval->end()) {
+ Intervals[Slot]->clear();
+ DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
+ EscapedAllocas++;
+ }
+ }
+ }
+}
+
+void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
+ unsigned NumSlots) {
+ // Expunge slot remap map.
+ for (unsigned i=0; i < NumSlots; ++i) {
+ // If we are remapping i
+ if (SlotRemap.count(i)) {
+ int Target = SlotRemap[i];
+ // As long as our target is mapped to something else, follow it.
+ while (SlotRemap.count(Target)) {
+ Target = SlotRemap[Target];
+ SlotRemap[i] = Target;
+ }
+ }
+ }
+}
+
+bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
+ DEBUG(dbgs() << "********** Stack Coloring **********\n"
+ << "********** Function: "
+ << ((const Value*)Func.getFunction())->getName() << '\n');
+ MF = &Func;
+ MFI = MF->getFrameInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ BlockLiveness.clear();
+ BasicBlocks.clear();
+ BasicBlockNumbering.clear();
+ Markers.clear();
+ Intervals.clear();
+ VNInfoAllocator.Reset();
+
+ unsigned NumSlots = MFI->getObjectIndexEnd();
+
+ // If there are no stack slots then there are no markers to remove.
+ if (!NumSlots)
+ return false;
+
+ SmallVector<int, 8> SortedSlots;
+
+ SortedSlots.reserve(NumSlots);
+ Intervals.reserve(NumSlots);
+
+ unsigned NumMarkers = collectMarkers(NumSlots);
+
+ unsigned TotalSize = 0;
+ DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
+ DEBUG(dbgs()<<"Slot structure:\n");
+
+ for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
+ DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
+ TotalSize += MFI->getObjectSize(i);
+ }
+
+ DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
+
+ // Don't continue because there are not enough lifetime markers, or the
+ // stack is too small, or we are told not to optimize the slots.
+ if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
+ DEBUG(dbgs()<<"Will not try to merge slots.\n");
+ return removeAllMarkers();
+ }
+
+ for (unsigned i=0; i < NumSlots; ++i) {
+ LiveInterval *LI = new LiveInterval(i, 0);
+ Intervals.push_back(LI);
+ LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
+ SortedSlots.push_back(i);
+ }
+
+ // Calculate the liveness of each block.
+ calculateLocalLiveness();
+
+ // Propagate the liveness information.
+ calculateLiveIntervals(NumSlots);
+
+ // Search for allocas which are used outside of the declared lifetime
+ // markers.
+ if (ProtectFromEscapedAllocas)
+ removeInvalidSlotRanges();
+
+ // Maps old slots to new slots.
+ DenseMap<int, int> SlotRemap;
+ unsigned RemovedSlots = 0;
+ unsigned ReducedSize = 0;
+
+ // Do not bother looking at empty intervals.
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (Intervals[SortedSlots[I]]->empty())
+ SortedSlots[I] = -1;
+ }
+
+ // This is a simple greedy algorithm for merging allocas. First, sort the
+ // slots, placing the largest slots first. Next, perform an n^2 scan and look
+ // for disjoint slots. When you find disjoint slots, merge the samller one
+ // into the bigger one and update the live interval. Remove the small alloca
+ // and continue.
+
+ // Sort the slots according to their size. Place unused slots at the end.
+ // Use stable sort to guarantee deterministic code generation.
+ std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
+ SlotSizeSorter(MFI));
+
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (SortedSlots[I] == -1)
+ continue;
+
+ for (unsigned J=I+1; J < NumSlots; ++J) {
+ if (SortedSlots[J] == -1)
+ continue;
+
+ int FirstSlot = SortedSlots[I];
+ int SecondSlot = SortedSlots[J];
+ LiveInterval *First = Intervals[FirstSlot];
+ LiveInterval *Second = Intervals[SecondSlot];
+ assert (!First->empty() && !Second->empty() && "Found an empty range");
+
+ // Merge disjoint slots.
+ if (!First->overlaps(*Second)) {
+ Changed = true;
+ First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
+ SlotRemap[SecondSlot] = FirstSlot;
+ SortedSlots[J] = -1;
+ DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
+ SecondSlot<<" together.\n");
+ unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
+ MFI->getObjectAlignment(SecondSlot));
+
+ assert(MFI->getObjectSize(FirstSlot) >=
+ MFI->getObjectSize(SecondSlot) &&
+ "Merging a small object into a larger one");
+
+ RemovedSlots+=1;
+ ReducedSize += MFI->getObjectSize(SecondSlot);
+ MFI->setObjectAlignment(FirstSlot, MaxAlignment);
+ MFI->RemoveStackObject(SecondSlot);
+ }
+ }
+ }
+ }// While changed.
+
+ // Record statistics.
+ StackSpaceSaved += ReducedSize;
+ StackSlotMerged += RemovedSlots;
+ DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
+ ReducedSize<<" bytes\n");
+
+ // Scan the entire function and update all machine operands that use frame
+ // indices to use the remapped frame index.
+ expungeSlotMap(SlotRemap, NumSlots);
+ remapInstructions(SlotRemap);
+
+ // Release the intervals.
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ delete Intervals[I];
+ }
+
+ return removeAllMarkers();
+}
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 0000000..fbef347
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,370 @@
+//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stack-protector"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+STATISTIC(NumFunProtected, "Number of functions protected");
+STATISTIC(NumAddrTaken, "Number of local variables that have their address"
+ " taken.");
+
+namespace {
+ class StackProtector : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLoweringBase *TLI;
+
+ Function *F;
+ Module *M;
+
+ DominatorTree *DT;
+
+ /// VisitedPHIs - The set of PHI nodes visited when determining
+ /// if a variable's reference has been taken. This set
+ /// is maintained to ensure we don't visit the same PHI node multiple
+ /// times.
+ SmallPtrSet<const PHINode*, 16> VisitedPHIs;
+
+ /// InsertStackProtectors - Insert code into the prologue and epilogue of
+ /// the function.
+ ///
+ /// - The prologue code loads and stores the stack guard onto the stack.
+ /// - The epilogue checks the value stored in the prologue against the
+ /// original value. It calls __stack_chk_fail if they differ.
+ bool InsertStackProtectors();
+
+ /// CreateFailBB - Create a basic block to jump to when the stack protector
+ /// check fails.
+ BasicBlock *CreateFailBB();
+
+ /// ContainsProtectableArray - Check whether the type either is an array or
+ /// contains an array of sufficient size so that we need stack protectors
+ /// for it.
+ bool ContainsProtectableArray(Type *Ty, bool Strong = false,
+ bool InStruct = false) const;
+
+ /// \brief Check whether a stack allocation has its address taken.
+ bool HasAddressTaken(const Instruction *AI);
+
+ /// RequiresStackProtector - Check whether or not this function needs a
+ /// stack protector based upon the stack protector level.
+ bool RequiresStackProtector();
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ StackProtector() : FunctionPass(ID), TLI(0) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
+ StackProtector(const TargetLoweringBase *tli)
+ : FunctionPass(ID), TLI(tli) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ }
+
+ virtual bool runOnFunction(Function &Fn);
+ };
+} // end anonymous namespace
+
+char StackProtector::ID = 0;
+INITIALIZE_PASS(StackProtector, "stack-protector",
+ "Insert stack protectors", false, false)
+
+FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) {
+ return new StackProtector(tli);
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+ F = &Fn;
+ M = F->getParent();
+ DT = getAnalysisIfAvailable<DominatorTree>();
+
+ if (!RequiresStackProtector()) return false;
+
+ ++NumFunProtected;
+ return InsertStackProtectors();
+}
+
+/// ContainsProtectableArray - Check whether the type either is an array or
+/// contains a char array of sufficient size so that we need stack protectors
+/// for it.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong,
+ bool InStruct) const {
+ if (!Ty) return false;
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ // In strong mode any array, regardless of type and size, triggers a
+ // protector
+ if (Strong)
+ return true;
+ const TargetMachine &TM = TLI->getTargetMachine();
+ if (!AT->getElementType()->isIntegerTy(8)) {
+ Triple Trip(TM.getTargetTriple());
+
+ // If we're on a non-Darwin platform or we're inside of a structure, don't
+ // add stack protectors unless the array is a character array.
+ if (InStruct || !Trip.isOSDarwin())
+ return false;
+ }
+
+ // If an array has more than SSPBufferSize bytes of allocated space, then we
+ // emit stack protectors.
+ if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT))
+ return true;
+ }
+
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ if (!ST) return false;
+
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end(); I != E; ++I)
+ if (ContainsProtectableArray(*I, Strong, true))
+ return true;
+
+ return false;
+}
+
+bool StackProtector::HasAddressTaken(const Instruction *AI) {
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (AI == SI->getValueOperand())
+ return true;
+ } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {
+ if (AI == SI->getOperand(0))
+ return true;
+ } else if (isa<CallInst>(U)) {
+ return true;
+ } else if (isa<InvokeInst>(U)) {
+ return true;
+ } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ if (HasAddressTaken(SI))
+ return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+ // Keep track of what PHI nodes we have already visited to ensure
+ // they are only visited once.
+ if (VisitedPHIs.insert(PN))
+ if (HasAddressTaken(PN))
+ return true;
+ } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (HasAddressTaken(GEP))
+ return true;
+ } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
+ if (HasAddressTaken(BI))
+ return true;
+ }
+ }
+ return false;
+}
+
+/// \brief Check whether or not this function needs a stack protector based
+/// upon the stack protector level.
+///
+/// We use two heuristics: a standard (ssp) and strong (sspstrong).
+/// The standard heuristic which will add a guard variable to functions that
+/// call alloca with a either a variable size or a size >= SSPBufferSize,
+/// functions with character buffers larger than SSPBufferSize, and functions
+/// with aggregates containing character buffers larger than SSPBufferSize. The
+/// strong heuristic will add a guard variables to functions that call alloca
+/// regardless of size, functions with any buffer regardless of type and size,
+/// functions with aggregates that contain any buffer regardless of type and
+/// size, and functions that contain stack-based variables that have had their
+/// address taken.
+bool StackProtector::RequiresStackProtector() {
+ bool Strong = false;
+ if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq))
+ return true;
+ else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong))
+ Strong = true;
+ else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtect))
+ return false;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ BasicBlock *BB = I;
+
+ for (BasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (AI->isArrayAllocation()) {
+ // SSP-Strong: Enable protectors for any call to alloca, regardless
+ // of size.
+ if (Strong)
+ return true;
+
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(AI->getArraySize())) {
+ unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize;
+ if (CI->getLimitedValue(BufferSize) >= BufferSize)
+ // A call to alloca with size >= SSPBufferSize requires
+ // stack protectors.
+ return true;
+ } else // A call to alloca with a variable size requires protectors.
+ return true;
+ }
+
+ if (ContainsProtectableArray(AI->getAllocatedType(), Strong))
+ return true;
+
+ if (Strong && HasAddressTaken(AI)) {
+ ++NumAddrTaken;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+/// - The prologue code loads and stores the stack guard onto the stack.
+/// - The epilogue checks the value stored in the prologue against the original
+/// value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+ BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
+ BasicBlock *FailBBDom = 0; // FailBB's dominator.
+ AllocaInst *AI = 0; // Place on stack that stores the stack guard.
+ Value *StackGuardVar = 0; // The stack guard variable.
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
+ BasicBlock *BB = I++;
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI) continue;
+
+ if (!FailBB) {
+ // Insert code into the entry block that stores the __stack_chk_guard
+ // variable onto the stack:
+ //
+ // entry:
+ // StackGuardSlot = alloca i8*
+ // StackGuard = load __stack_chk_guard
+ // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+ //
+ PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+ unsigned AddressSpace, Offset;
+ if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
+ Constant *OffsetVal =
+ ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
+
+ StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
+ PointerType::get(PtrTy, AddressSpace));
+ } else {
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ }
+
+ BasicBlock &Entry = F->getEntryBlock();
+ Instruction *InsPt = &Entry.front();
+
+ AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+ LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
+
+ Value *Args[] = { LI, AI };
+ CallInst::
+ Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+ Args, "", InsPt);
+
+ // Create the basic block to jump to when the guard check fails.
+ FailBB = CreateFailBB();
+ }
+
+ // For each block with a return instruction, convert this:
+ //
+ // return:
+ // ...
+ // ret ...
+ //
+ // into this:
+ //
+ // return:
+ // ...
+ // %1 = load __stack_chk_guard
+ // %2 = load StackGuardSlot
+ // %3 = cmp i1 %1, %2
+ // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+ //
+ // SP_return:
+ // ret ...
+ //
+ // CallStackCheckFailBlk:
+ // call void @__stack_chk_fail()
+ // unreachable
+
+ // Split the basic block before the return instruction.
+ BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+
+ if (DT && DT->isReachableFromEntry(BB)) {
+ DT->addNewBlock(NewBB, BB);
+ FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB;
+ }
+
+ // Remove default branch instruction to the new BB.
+ BB->getTerminator()->eraseFromParent();
+
+ // Move the newly created basic block to the point right after the old basic
+ // block so that it's in the "fall through" position.
+ NewBB->moveAfter(BB);
+
+ // Generate the stack protector instructions in the old basic block.
+ LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
+ LoadInst *LI2 = new LoadInst(AI, "", true, BB);
+ ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
+ BranchInst::Create(NewBB, FailBB, Cmp, BB);
+ }
+
+ // Return if we didn't modify any basic blocks. I.e., there are no return
+ // statements in the function.
+ if (!FailBB) return false;
+
+ if (DT && FailBBDom)
+ DT->addNewBlock(FailBB, FailBBDom);
+
+ return true;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+ BasicBlock *FailBB = BasicBlock::Create(F->getContext(),
+ "CallStackCheckFailBlk", F);
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_chk_fail",
+ Type::getVoidTy(F->getContext()), NULL);
+ CallInst::Create(StackChkFail, "", FailBB);
+ new UnreachableInst(F->getContext(), FailBB);
+ return FailBB;
+}
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 0000000..f951561
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,439 @@
+//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackslotcoloring"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+ cl::init(false), cl::Hidden,
+ cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
+
+namespace {
+ class StackSlotColoring : public MachineFunctionPass {
+ LiveStacks* LS;
+ MachineFrameInfo *MFI;
+ const TargetInstrInfo *TII;
+ const MachineLoopInfo *loopInfo;
+
+ // SSIntervals - Spill slot intervals.
+ std::vector<LiveInterval*> SSIntervals;
+
+ // SSRefs - Keep a list of frame index references for each spill slot.
+ SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs;
+
+ // OrigAlignments - Alignments of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigAlignments;
+
+ // OrigSizes - Sizess of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigSizes;
+
+ // AllColors - If index is set, it's a spill slot, i.e. color.
+ // FIXME: This assumes PEI locate spill slot with smaller indices
+ // closest to stack pointer / frame pointer. Therefore, smaller
+ // index == better color.
+ BitVector AllColors;
+
+ // NextColor - Next "color" that's not yet used.
+ int NextColor;
+
+ // UsedColors - "Colors" that have been assigned.
+ BitVector UsedColors;
+
+ // Assignments - Color to intervals mapping.
+ SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+
+ public:
+ static char ID; // Pass identification
+ StackSlotColoring() :
+ MachineFunctionPass(ID), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveStacks>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ void InitializeSlots();
+ void ScanForSpillSlotRefs(MachineFunction &MF);
+ bool OverlapWithAssignments(LiveInterval *li, int Color) const;
+ int ColorSlot(LiveInterval *li);
+ bool ColorSlots(MachineFunction &MF);
+ void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
+ MachineFunction &MF);
+ bool RemoveDeadStores(MachineBasicBlock* MBB);
+ };
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+char &llvm::StackSlotColoringID = StackSlotColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+
+namespace {
+ // IntervalSorter - Comparison predicate that sort live intervals by
+ // their weight.
+ struct IntervalSorter {
+ bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+ return LHS->weight > RHS->weight;
+ }
+ };
+}
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+ SSRefs.resize(MFI->getObjectIndexEnd());
+
+ // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+ MII != EE; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI < 0)
+ continue;
+ if (!LS->hasInterval(FI))
+ continue;
+ LiveInterval &li = LS->getInterval(FI);
+ if (!MI->isDebugValue())
+ li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+ SSRefs[FI].push_back(MI);
+ }
+ }
+ }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+ int LastFI = MFI->getObjectIndexEnd();
+ OrigAlignments.resize(LastFI);
+ OrigSizes.resize(LastFI);
+ AllColors.resize(LastFI);
+ UsedColors.resize(LastFI);
+ Assignments.resize(LastFI);
+
+ // Gather all spill slots into a list.
+ DEBUG(dbgs() << "Spill slot intervals:\n");
+ for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
+ LiveInterval &li = i->second;
+ DEBUG(li.dump());
+ int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
+ if (MFI->isDeadObjectIndex(FI))
+ continue;
+ SSIntervals.push_back(&li);
+ OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+ OrigSizes[FI] = MFI->getObjectSize(FI);
+ AllColors.set(FI);
+ }
+ DEBUG(dbgs() << '\n');
+
+ // Sort them by weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+ // Get first "color".
+ NextColor = AllColors.find_first();
+}
+
+/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
+/// LiveIntervals that have already been assigned to the specified color.
+bool
+StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
+ const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color];
+ for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
+ LiveInterval *OtherLI = OtherLIs[i];
+ if (OtherLI->overlaps(*li))
+ return true;
+ }
+ return false;
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+///
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+ int Color = -1;
+ bool Share = false;
+ if (!DisableSharing) {
+ // Check if it's possible to reuse any of the used colors.
+ Color = UsedColors.find_first();
+ while (Color != -1) {
+ if (!OverlapWithAssignments(li, Color)) {
+ Share = true;
+ ++NumEliminated;
+ break;
+ }
+ Color = UsedColors.find_next(Color);
+ }
+ }
+
+ // Assign it to the first available color (assumed to be the best) if it's
+ // not possible to share a used color with other objects.
+ if (!Share) {
+ assert(NextColor != -1 && "No more spill slots?");
+ Color = NextColor;
+ UsedColors.set(Color);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ // Record the assignment.
+ Assignments[Color].push_back(li);
+ int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+ DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+
+ // Change size and alignment of the allocated slot. If there are multiple
+ // objects sharing the same slot, then make sure the size and alignment
+ // are large enough for all.
+ unsigned Align = OrigAlignments[FI];
+ if (!Share || Align > MFI->getObjectAlignment(Color))
+ MFI->setObjectAlignment(Color, Align);
+ int64_t Size = OrigSizes[FI];
+ if (!Share || Size > MFI->getObjectSize(Color))
+ MFI->setObjectSize(Color, Size);
+ return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+ unsigned NumObjs = MFI->getObjectIndexEnd();
+ SmallVector<int, 16> SlotMapping(NumObjs, -1);
+ SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+ SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+ BitVector UsedColors(NumObjs);
+
+ DEBUG(dbgs() << "Color spill slot intervals:\n");
+ bool Changed = false;
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int NewSS = ColorSlot(li);
+ assert(NewSS >= 0 && "Stack coloring failed?");
+ SlotMapping[SS] = NewSS;
+ RevMap[NewSS].push_back(SS);
+ SlotWeights[NewSS] += li->weight;
+ UsedColors.set(NewSS);
+ Changed |= (SS != NewSS);
+ }
+
+ DEBUG(dbgs() << "\nSpill slots after coloring:\n");
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ li->weight = SlotWeights[SS];
+ }
+ // Sort them by new weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
+ DEBUG(SSIntervals[i]->dump());
+ DEBUG(dbgs() << '\n');
+#endif
+
+ if (!Changed)
+ return false;
+
+ // Rewrite all MO_FrameIndex operands.
+ SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
+ for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+ int NewFI = SlotMapping[SS];
+ if (NewFI == -1 || (NewFI == (int)SS))
+ continue;
+
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
+ RewriteInstruction(RefMIs[i], SS, NewFI, MF);
+ }
+
+ // Delete unused stack slots.
+ while (NextColor != -1) {
+ DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
+ MFI->RemoveStackObject(NextColor);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
+ int NewFI, MachineFunction &MF) {
+ // Update the operands.
+ for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI != OldFI)
+ continue;
+ MO.setIndex(NewFI);
+ }
+
+ // Update the memory references. This changes the MachineMemOperands
+ // directly. They may be in use by multiple instructions, however all
+ // instructions using OldFI are being rewritten to use NewFI.
+ const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
+ const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I)
+ if ((*I)->getValue() == OldSV)
+ (*I)->setValue(NewSV);
+}
+
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores. If they're both using the same stack slot, then the store is
+/// definitely dead. This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+ // FIXME: This could be much more aggressive, but we need to investigate
+ // the compile time impact of doing so.
+ bool changed = false;
+
+ SmallVector<MachineInstr*, 4> toErase;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (DCELimit != -1 && (int)NumDead >= DCELimit)
+ break;
+
+ MachineBasicBlock::iterator NextMI = llvm::next(I);
+ if (NextMI == MBB->end()) continue;
+
+ int FirstSS, SecondSS;
+ unsigned LoadReg = 0;
+ unsigned StoreReg = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
+ if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
+
+ ++NumDead;
+ changed = true;
+
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
+ ++NumDead;
+ toErase.push_back(I);
+ }
+
+ toErase.push_back(NextMI);
+ ++I;
+ }
+
+ for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return changed;
+}
+
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Stack Slot Coloring **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ MFI = MF.getFrameInfo();
+ TII = MF.getTarget().getInstrInfo();
+ LS = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ bool Changed = false;
+
+ unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots == 0)
+ // Nothing to do!
+ return false;
+
+ // If there are calls to setjmp or sigsetjmp, don't perform stack slot
+ // coloring. The stack could be modified before the longjmp is executed,
+ // resulting in the wrong value being used afterwards. (See
+ // <rdar://problem/8007500>.)
+ if (MF.exposesReturnsTwice())
+ return false;
+
+ // Gather spill slot references
+ ScanForSpillSlotRefs(MF);
+ InitializeSlots();
+ Changed = ColorSlots(MF);
+
+ NextColor = -1;
+ SSIntervals.clear();
+ for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+ SSRefs[i].clear();
+ SSRefs.clear();
+ OrigAlignments.clear();
+ OrigSizes.clear();
+ AllColors.clear();
+ UsedColors.clear();
+ for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
+ Assignments[i].clear();
+ Assignments.clear();
+
+ if (Changed) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= RemoveDeadStores(I);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
new file mode 100644
index 0000000..b337c53
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
@@ -0,0 +1,825 @@
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
+//
+// Budimlic, et al. Fast copy coalescing and live-range identification.
+// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
+// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
+// PLDI '02. ACM, New York, NY, 25-32.
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest. This technique is explained in:
+//
+// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+// Quality and Efficiency,
+// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+// CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strongphielim"
+#include "llvm/CodeGen/Passes.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+namespace {
+ class StrongPHIElimination : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ StrongPHIElimination() : MachineFunctionPass(ID) {
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
+
+ private:
+ /// This struct represents a single node in the union-find data structure
+ /// representing the variable congruence classes. There is one difference
+ /// from a normal union-find data structure. We steal two bits from the parent
+ /// pointer . One of these bits is used to represent whether the register
+ /// itself has been isolated, and the other is used to represent whether the
+ /// PHI with that register as its destination has been isolated.
+ ///
+ /// Note that this leads to the strange situation where the leader of a
+ /// congruence class may no longer logically be a member, due to being
+ /// isolated.
+ struct Node {
+ enum Flags {
+ kRegisterIsolatedFlag = 1,
+ kPHIIsolatedFlag = 2
+ };
+ Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+ Node *getLeader();
+
+ PointerIntPair<Node*, 2> parent;
+ unsigned value;
+ unsigned rank;
+ };
+
+ /// Add a register in a new congruence class containing only itself.
+ void addReg(unsigned);
+
+ /// Join the congruence classes of two registers. This function is biased
+ /// towards the left argument, i.e. after
+ ///
+ /// addReg(r2);
+ /// unionRegs(r1, r2);
+ ///
+ /// the leader of the unioned congruence class is the same as the leader of
+ /// r1's congruence class prior to the union. This is actually relied upon
+ /// in the copy insertion code.
+ void unionRegs(unsigned, unsigned);
+
+ /// Get the color of a register. The color is 0 if the register has been
+ /// isolated.
+ unsigned getRegColor(unsigned);
+
+ // Isolate a register.
+ void isolateReg(unsigned);
+
+ /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+ /// isolated. Otherwise, it is the original color of its destination and
+ /// all of its operands (before they were isolated, if they were).
+ unsigned getPHIColor(MachineInstr*);
+
+ /// Isolate a PHI.
+ void isolatePHI(MachineInstr*);
+
+ /// Traverses a basic block, splitting any interferences found between
+ /// registers in the same congruence class. It takes two DenseMaps as
+ /// arguments that it also updates: CurrentDominatingParent, which maps
+ /// a color to the register in that congruence class whose definition was
+ /// most recently seen, and ImmediateDominatingParent, which maps a register
+ /// to the register in the same congruence class that most immediately
+ /// dominates it.
+ ///
+ /// This function assumes that it is being called in a depth-first traversal
+ /// of the dominator tree.
+ void SplitInterferencesForBasicBlock(
+ MachineBasicBlock&,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+ // Lowers a PHI instruction, inserting copies of the source and destination
+ // registers as necessary.
+ void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+ // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+ // everywhere that Reg appears. Requires Reg and NewReg to have non-
+ // overlapping lifetimes.
+ void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DT;
+ LiveIntervals *LI;
+
+ BumpPtrAllocator Allocator;
+
+ DenseMap<unsigned, Node*> RegNodeMap;
+
+ // Maps a basic block to a list of its defs of registers that appear as PHI
+ // sources.
+ DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+ // Maps a color to a pair of a MachineInstr* and a virtual register, which
+ // is the operand of that PHI corresponding to the current basic block.
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+ // FIXME: Can these two data structures be combined? Would a std::multimap
+ // be any better?
+
+ // Stores pairs of predecessor basic blocks and the source registers of
+ // inserted copy instructions.
+ typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+ SrcCopySet InsertedSrcCopySet;
+
+ // Maps pairs of predecessor basic blocks and colors to their defining copy
+ // instructions.
+ typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+ SrcCopyMap;
+ SrcCopyMap InsertedSrcCopyMap;
+
+ // Maps inserted destination copy registers to their defining copy
+ // instructions.
+ typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+ DestCopyMap InsertedDestCopies;
+ };
+
+ struct MIIndexCompare {
+ MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
+ }
+
+ LiveIntervals *LI;
+ };
+} // namespace
+
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
+
+char StrongPHIElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+ // FIXME: This only needs to check from the first terminator, as only the
+ // first terminator can use a virtual register.
+ for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+ assert (RI != MBB->rend());
+ MachineInstr *MI = &*RI;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ MachineOperand &MO = *OI;
+ if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+ return &MO;
+ }
+ }
+}
+
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LI = &getAnalysis<LiveIntervals>();
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+ PHISrcDefs[I].push_back(BBI);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = BBI->getOperand(i);
+ unsigned SrcReg = SrcMO.getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI)
+ PHISrcDefs[DefMI->getParent()].push_back(DefMI);
+ }
+ }
+ }
+
+ // Perform a depth-first traversal of the dominator tree, splitting
+ // interferences amongst PHI-congruence classes.
+ DenseMap<unsigned, unsigned> CurrentDominatingParent;
+ DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+ for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+ SplitInterferencesForBasicBlock(*DI->getBlock(),
+ CurrentDominatingParent,
+ ImmediateDominatingParent);
+ }
+
+ // Insert copies for all PHI source and destination registers.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ InsertCopiesForPHI(BBI, I);
+ }
+ }
+
+ // FIXME: Preserve the equivalence classes during copy insertion and use
+ // the preversed equivalence classes instead of recomputing them.
+ RegNodeMap.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ unsigned SrcReg = BBI->getOperand(i).getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+ }
+ }
+ }
+
+ DenseMap<unsigned, unsigned> RegRenamingMap;
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ while (BBI != BBE && BBI->isPHI()) {
+ MachineInstr *PHI = BBI;
+
+ assert(PHI->getNumOperands() > 0);
+
+ unsigned SrcReg = PHI->getOperand(1).getReg();
+ unsigned SrcColor = getRegColor(SrcReg);
+ unsigned NewReg = RegRenamingMap[SrcColor];
+ if (!NewReg) {
+ NewReg = SrcReg;
+ RegRenamingMap[SrcColor] = SrcReg;
+ }
+ MergeLIsAndRename(SrcReg, NewReg);
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ if (!InsertedDestCopies.count(DestReg))
+ MergeLIsAndRename(DestReg, NewReg);
+
+ for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcReg = PHI->getOperand(i).getReg();
+ MergeLIsAndRename(SrcReg, NewReg);
+ }
+
+ ++BBI;
+ LI->RemoveMachineInstrFromMaps(PHI);
+ PHI->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ // Due to the insertion of copies to split live ranges, the live intervals are
+ // guaranteed to not overlap, except in one case: an original PHI source and a
+ // PHI destination copy. In this case, they have the same value and thus don't
+ // truly intersect, so we merge them into the value live at that point.
+ // FIXME: Is there some better way we can handle this?
+ for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+ E = InsertedDestCopies.end(); I != E; ++I) {
+ unsigned DestReg = I->first;
+ unsigned DestColor = getRegColor(DestReg);
+ unsigned NewReg = RegRenamingMap[DestColor];
+
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ assert(DestLI.ranges.size() == 1
+ && "PHI destination copy's live interval should be a single live "
+ "range from the beginning of the BB to the copy instruction.");
+ LiveRange *DestLR = DestLI.begin();
+ VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+ if (!NewVNI) {
+ NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+ MachineInstr *CopyInstr = I->second;
+ CopyInstr->getOperand(1).setIsKill(true);
+ }
+
+ LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+ NewLI.addRange(NewLR);
+
+ LI->removeInterval(DestReg);
+ MRI->replaceRegWith(DestReg, NewReg);
+ }
+
+ // Adjust the live intervals of all PHI source registers to handle the case
+ // where the PHIs in successor blocks were the only later uses of the source
+ // register.
+ for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+ E = InsertedSrcCopySet.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I->first;
+ unsigned SrcReg = I->second;
+ if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+ SrcReg = RenamedRegister;
+
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+ isLiveOut = true;
+ break;
+ }
+ }
+
+ if (isLiveOut)
+ continue;
+
+ MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+ assert(LastUse);
+ SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+ SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB));
+ LastUse->setIsKill(true);
+ }
+
+ Allocator.Reset();
+ RegNodeMap.clear();
+ PHISrcDefs.clear();
+ InsertedSrcCopySet.clear();
+ InsertedSrcCopyMap.clear();
+ InsertedDestCopies.clear();
+
+ return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+ Node *&N = RegNodeMap[Reg];
+ if (!N)
+ N = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+ Node *N = this;
+ Node *Parent = parent.getPointer();
+ Node *Grandparent = Parent->parent.getPointer();
+
+ while (Parent != Grandparent) {
+ N->parent.setPointer(Grandparent);
+ N = Grandparent;
+ Parent = Parent->parent.getPointer();
+ Grandparent = Parent->parent.getPointer();
+ }
+
+ return Parent;
+}
+
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+ DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+ if (RI == RegNodeMap.end())
+ return 0;
+ Node *Node = RI->second;
+ if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+ return 0;
+ return Node->getLeader()->value;
+}
+
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+ Node *Node1 = RegNodeMap[Reg1]->getLeader();
+ Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+ if (Node1->rank > Node2->rank) {
+ Node2->parent.setPointer(Node1->getLeader());
+ } else if (Node1->rank < Node2->rank) {
+ Node1->parent.setPointer(Node2->getLeader());
+ } else if (Node1 != Node2) {
+ Node2->parent.setPointer(Node1->getLeader());
+ Node1->rank++;
+ }
+}
+
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+ Node *Node = RegNodeMap[Reg];
+ Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
+}
+
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ Node *DestNode = RegNodeMap[DestReg];
+ if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+ return 0;
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+ if (SrcColor)
+ return SrcColor;
+ }
+ return 0;
+}
+
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+ Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+ Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
+///
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+/// congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+/// same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+/// def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+///
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+ MachineBasicBlock &MBB,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+ // Sort defs by their order in the original basic block, as the code below
+ // assumes that it is processing definitions in dominance order.
+ std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+ std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+ for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+ BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+ for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+ E = (*BBI)->operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+
+ // FIXME: This would be faster if it were possible to bail out of checking
+ // an instruction's operands after the explicit defs, but this is incorrect
+ // for variadic instructions, which may appear before register allocation
+ // in the future.
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned DestReg = MO.getReg();
+ if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // If the virtual register being defined is not used in any PHI or has
+ // already been isolated, then there are no more interferences to check.
+ unsigned DestColor = getRegColor(DestReg);
+ if (!DestColor)
+ continue;
+
+ // The input to this pass sometimes is not in SSA form in every basic
+ // block, as some virtual registers have redefinitions. We could eliminate
+ // this by fixing the passes that generate the non-SSA code, or we could
+ // handle it here by tracking defining machine instructions rather than
+ // virtual registers. For now, we just handle the situation conservatively
+ // in a way that will possibly lead to false interferences.
+ unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+ unsigned NewParent = CurrentParent;
+ if (NewParent == DestReg)
+ continue;
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+
+ // If NewParent is nonzero, then its definition dominates the current
+ // instruction, so it is only necessary to check for the liveness of
+ // NewParent in order to check for an interference.
+ if (NewParent
+ && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+ // If there is an interference, always isolate the new register. This
+ // could be improved by using a heuristic that decides which of the two
+ // registers to isolate.
+ isolateReg(DestReg);
+ CurrentParent = NewParent;
+ } else {
+ // If there is no interference, update ImmediateDominatingParent and set
+ // the CurrentDominatingParent for this color to the current register.
+ ImmediateDominatingParent[DestReg] = NewParent;
+ CurrentParent = DestReg;
+ }
+ }
+ }
+
+ // We now walk the PHIs in successor blocks and check for interferences. This
+ // is necessary because the use of a PHI's operands are logically contained in
+ // the predecessor block. The def of a PHI's destination register is processed
+ // along with the other defs in a basic block.
+
+ CurrentPHIForColor.clear();
+
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ MachineInstr *PHI = BBI;
+
+ // If a PHI is already isolated, either by being isolated directly or
+ // having all of its operands isolated, ignore it.
+ unsigned Color = getPHIColor(PHI);
+ if (!Color)
+ continue;
+
+ // Find the index of the PHI operand that corresponds to this basic block.
+ unsigned PredIndex;
+ for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+ if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
+ break;
+ }
+ assert(PredIndex < PHI->getNumOperands());
+ unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ unsigned &CurrentParent = CurrentDominatingParent[Color];
+ unsigned NewParent = CurrentParent;
+ while (NewParent
+ && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+ CurrentParent = NewParent;
+
+ // If there is an interference with a register, always isolate the
+ // register rather than the PHI. It is also possible to isolate the
+ // PHI, but that introduces copies for all of the registers involved
+ // in that PHI.
+ if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+ && NewParent != PredOperandReg)
+ isolateReg(NewParent);
+
+ std::pair<MachineInstr*, unsigned>
+ &CurrentPHI = CurrentPHIForColor[Color];
+
+ // If two PHIs have the same operand from every shared predecessor, then
+ // they don't actually interfere. Otherwise, isolate the current PHI. This
+ // could possibly be improved, e.g. we could isolate the PHI with the
+ // fewest operands.
+ if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+ isolatePHI(PHI);
+ else
+ CurrentPHI = std::make_pair(PHI, PredOperandReg);
+ }
+ }
+}
+
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+ MachineBasicBlock *MBB) {
+ assert(PHI->isPHI());
+ ++NumPHIsLowered;
+ unsigned PHIColor = getPHIColor(PHI);
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = PHI->getOperand(i);
+
+ // If a source is defined by an implicit def, there is no need to insert a
+ // copy in the predecessor.
+ if (SrcMO.isUndef())
+ continue;
+
+ unsigned SrcReg = SrcMO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+ unsigned SrcColor = getRegColor(SrcReg);
+
+ // If neither the PHI nor the operand were isolated, then we only need to
+ // set the phi-kill flag on the VNInfo at this PHI.
+ if (PHIColor && SrcColor == PHIColor) {
+ LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+ SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+ VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
+ (void)SrcVNI;
+ assert(SrcVNI);
+ continue;
+ }
+
+ unsigned CopyReg = 0;
+ if (PHIColor) {
+ SrcCopyMap::const_iterator I
+ = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+ CopyReg
+ = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+ }
+
+ if (!CopyReg) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineBasicBlock::iterator
+ CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+ unsigned SrcSubReg = SrcMO.getSubReg();
+ MachineInstr *CopyInstr = BuildMI(*PredBB,
+ CopyInsertPoint,
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyReg).addReg(SrcReg, 0, SrcSubReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ ++NumSrcCopiesInserted;
+
+ // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+ // the newly added range.
+ LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+ InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+ addReg(CopyReg);
+ if (PHIColor) {
+ unionRegs(PHIColor, CopyReg);
+ assert(getRegColor(CopyReg) != CopyReg);
+ } else {
+ PHIColor = CopyReg;
+ assert(getRegColor(CopyReg) == CopyReg);
+ }
+
+ // Insert into map if not already there.
+ InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor),
+ CopyInstr));
+ }
+
+ SrcMO.setReg(CopyReg);
+
+ // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+ // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+ // processed later, but this is still correct to do at this point because we
+ // never rely on LiveIntervals being correct while inserting copies.
+ // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+ // pass does?
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+ if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+ SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
+ }
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ unsigned DestColor = getRegColor(DestReg);
+
+ if (PHIColor && DestColor == PHIColor) {
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+
+ // Set the phi-def flag for the VN at this PHI.
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
+ assert(DestVNI);
+
+ // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+ // instruction. After PHI elimination, PHI instructions are replaced by VNs
+ // with the phi-def flag set, and the live ranges of these VNs start at the
+ // beginning of the basic block.
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ DestVNI->def = MBBStartIndex;
+ DestLI.addRange(LiveRange(MBBStartIndex,
+ PHIIndex.getRegSlot(),
+ DestVNI));
+ return;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+ unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineInstr *CopyInstr = BuildMI(*MBB,
+ MBB->SkipPHIsAndLabels(MBB->begin()),
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DestReg).addReg(CopyReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ PHI->getOperand(0).setReg(CopyReg);
+ ++NumDestCopiesInserted;
+
+ // Add the region from the beginning of MBB to the copy instruction to
+ // CopyReg's live interval, and give the VNInfo the phidef flag.
+ LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+ VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+ LI->getVNInfoAllocator());
+ CopyLI.addRange(LiveRange(MBBStartIndex,
+ DestCopyIndex.getRegSlot(),
+ CopyVNI));
+
+ // Adjust DestReg's live interval to adjust for its new definition at
+ // CopyInstr.
+ LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot());
+
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
+ assert(DestVNI);
+ DestVNI->def = DestCopyIndex.getRegSlot();
+
+ InsertedDestCopies[CopyReg] = CopyInstr;
+}
+
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+ if (Reg == NewReg)
+ return;
+
+ LiveInterval &OldLI = LI->getInterval(Reg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ // Merge the live ranges of the two registers.
+ DenseMap<VNInfo*, VNInfo*> VNMap;
+ for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+ LRI != LRE; ++LRI) {
+ LiveRange OldLR = *LRI;
+ VNInfo *OldVN = OldLR.valno;
+
+ VNInfo *&NewVN = VNMap[OldVN];
+ if (!NewVN) {
+ NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+ VNMap[OldVN] = NewVN;
+ }
+
+ LiveRange LR(OldLR.start, OldLR.end, NewVN);
+ NewLI.addRange(LR);
+ }
+
+ // Remove the LiveInterval for the register being renamed and replace all
+ // of its defs and uses with the new register.
+ LI->removeInterval(Reg);
+ MRI->replaceRegWith(Reg, NewReg);
+}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 0000000..1ec8817
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,970 @@
+//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass duplicates basic blocks ending in unconditional branches into
+// the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplication"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+STATISTIC(NumTails , "Number of tails duplicated");
+STATISTIC(NumTailDups , "Number of tail duplicated blocks");
+STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumAddedPHIs , "Number of phis added");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned>
+TailDuplicateSize("tail-dup-size",
+ cl::desc("Maximum instructions to consider tail duplicating"),
+ cl::init(2), cl::Hidden);
+
+static cl::opt<bool>
+TailDupVerify("tail-dup-verify",
+ cl::desc("Verify sanity of PHI instructions during taildup"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned>
+TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
+
+typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
+
+namespace {
+ /// TailDuplicatePass - Perform tail duplication.
+ class TailDuplicatePass : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineModuleInfo *MMI;
+ MachineRegisterInfo *MRI;
+ OwningPtr<RegScavenger> RS;
+ bool PreRegAlloc;
+
+ // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+ SmallVector<unsigned, 16> SSAUpdateVRs;
+
+ // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
+ // source virtual registers.
+ DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
+
+ public:
+ static char ID;
+ explicit TailDuplicatePass() :
+ MachineFunctionPass(ID), PreRegAlloc(false) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB);
+ void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+ const DenseSet<unsigned> &UsedByPhi,
+ bool Remove);
+ void DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi);
+ void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*, 8> &Succs);
+ bool TailDuplicateBlocks(MachineFunction &MF);
+ bool shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple, MachineBasicBlock &TailBB);
+ bool isSimpleBB(MachineBasicBlock *TailBB);
+ bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+ bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &RegsUsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies);
+ bool TailDuplicate(MachineBasicBlock *TailBB,
+ bool IsSimple,
+ MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies);
+ bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+ bool IsSimple,
+ MachineFunction &MF);
+
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ };
+
+ char TailDuplicatePass::ID = 0;
+}
+
+char &llvm::TailDuplicateID = TailDuplicatePass::ID;
+
+INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
+ false, false)
+
+bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ PreRegAlloc = MRI->isSSA();
+ RS.reset();
+ if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
+ RS.reset(new RegScavenger());
+
+ bool MadeChange = false;
+ while (TailDuplicateBlocks(MF))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
+ MBB->pred_end());
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != MBB->end()) {
+ if (!MI->isPHI())
+ break;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ bool Found = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (PHIBB == PredBB) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " missing input from predecessor BB#"
+ << PredBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (CheckExtra && !Preds.count(PHIBB)) {
+ dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+ << ": " << *MI;
+ dbgs() << " extra input from predecessor BB#"
+ << PHIBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ if (PHIBB->getNumber() < 0) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+ ++MI;
+ }
+ }
+}
+
+/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+bool
+TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+ bool IsSimple,
+ MachineFunction &MF) {
+ // Save the successors list.
+ SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ SmallVector<MachineBasicBlock*, 8> TDBBs;
+ SmallVector<MachineInstr*, 16> Copies;
+ if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
+ return false;
+
+ ++NumTails;
+
+ SmallVector<MachineInstr*, 8> NewPHIs;
+ MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+ // TailBB's immediate successors are now successors of those predecessors
+ // which duplicated TailBB. Add the predecessors as sources to the PHI
+ // instructions.
+ bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+ if (PreRegAlloc)
+ UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+ // If it is dead, remove it.
+ if (isDead) {
+ NumInstrDups -= MBB->size();
+ RemoveDeadBlock(MBB);
+ ++NumDeadBlocks;
+ }
+
+ // Update SSA form.
+ if (!SSAUpdateVRs.empty()) {
+ for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+ unsigned VReg = SSAUpdateVRs[i];
+ SSAUpdate.Initialize(VReg);
+
+ // If the original definition is still around, add it as an available
+ // value.
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ MachineBasicBlock *DefBB = 0;
+ if (DefMI) {
+ DefBB = DefMI->getParent();
+ SSAUpdate.AddAvailableValue(DefBB, VReg);
+ }
+
+ // Add the new vregs as available values.
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(VReg);
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+ }
+
+ // Rewrite uses that are outside of the original def's block.
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+ while (UI != MRI->use_end()) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI->isDebugValue()) {
+ // SSAUpdate can replace the use with an undef. That creates
+ // a debug instruction that is a kill.
+ // FIXME: Should it SSAUpdate job to delete debug instructions
+ // instead of replacing the use with undef?
+ UseMI->eraseFromParent();
+ continue;
+ }
+ if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+ continue;
+ SSAUpdate.RewriteUse(UseMO);
+ }
+ }
+
+ SSAUpdateVRs.clear();
+ SSAUpdateVals.clear();
+ }
+
+ // Eliminate some of the copies inserted by tail duplication to maintain
+ // SSA form.
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ MachineInstr *Copy = Copies[i];
+ if (!Copy->isCopy())
+ continue;
+ unsigned Dst = Copy->getOperand(0).getReg();
+ unsigned Src = Copy->getOperand(1).getReg();
+ if (MRI->hasOneNonDBGUse(Src) &&
+ MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
+ }
+ }
+
+ if (NewPHIs.size())
+ NumAddedPHIs += NewPHIs.size();
+
+ return true;
+}
+
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ if (PreRegAlloc && TailDupVerify) {
+ DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+ VerifyPHIs(MF, true);
+ }
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+
+ if (NumTails == TailDupLimit)
+ break;
+
+ bool IsSimple = isSimpleBB(MBB);
+
+ if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+ continue;
+
+ MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
+ }
+
+ if (PreRegAlloc && TailDupVerify)
+ VerifyPHIs(MF, false);
+
+ return MadeChange;
+}
+
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->isDebugValue())
+ continue;
+ if (UseMI->getParent() != BB)
+ return true;
+ }
+ return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+ if (MI->getOperand(i+1).getMBB() == SrcBB)
+ return i;
+ return 0;
+}
+
+
+// Remember which registers are used by phis in this block. This is
+// used to determine which registers are liveout while modifying the
+// block (which is why we need to copy the information).
+static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
+ DenseSet<unsigned> *UsedByPhi) {
+ for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
+ I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ UsedByPhi->insert(SrcReg);
+ }
+ }
+}
+
+/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
+/// SSA update.
+void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB) {
+ DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
+ if (LI != SSAUpdateVals.end())
+ LI->second.push_back(std::make_pair(BB, NewReg));
+ else {
+ AvailableValsTy Vals;
+ Vals.push_back(std::make_pair(BB, NewReg));
+ SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+ SSAUpdateVRs.push_back(OrigReg);
+ }
+}
+
+/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
+/// Remember the source register that's contributed by PredBB and update SSA
+/// update map.
+void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+ const DenseSet<unsigned> &RegsUsedByPhi,
+ bool Remove) {
+ unsigned DefReg = MI->getOperand(0).getReg();
+ unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+ assert(SrcOpIdx && "Unable to find matching PHI source?");
+ unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
+
+ // Insert a copy from source to the end of the block. The def register is the
+ // available value liveout of the block.
+ unsigned NewDef = MRI->createVirtualRegister(RC);
+ Copies.push_back(std::make_pair(NewDef, SrcReg));
+ if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
+ AddSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+ if (!Remove)
+ return;
+
+ // Remove PredBB from the PHI node.
+ MI->RemoveOperand(SrcOpIdx+1);
+ MI->RemoveOperand(SrcOpIdx);
+ if (MI->getNumOperands() == 1)
+ MI->eraseFromParent();
+}
+
+/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi) {
+ MachineInstr *NewMI = TII->duplicate(MI, MF);
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ LocalVRMap.insert(std::make_pair(Reg, NewReg));
+ if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
+ AddSSAUpdateEntry(Reg, NewReg, PredBB);
+ } else {
+ DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
+ if (VI != LocalVRMap.end()) {
+ MO.setReg(VI->second);
+ MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
+ }
+ }
+ }
+ PredBB->insert(PredBB->instr_end(), NewMI);
+}
+
+/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
+/// blocks, the successors have gained new predecessors. Update the PHI
+/// instructions in them accordingly.
+void
+TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*,8> &Succs) {
+ for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
+ SE = Succs.end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+ II != EE; ++II) {
+ if (!II->isPHI())
+ break;
+ MachineInstrBuilder MIB(*FromBB->getParent(), II);
+ unsigned Idx = 0;
+ for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ Idx = i;
+ break;
+ }
+ }
+
+ assert(Idx != 0);
+ MachineOperand &MO0 = II->getOperand(Idx);
+ unsigned Reg = MO0.getReg();
+ if (isDead) {
+ // Folded into the previous BB.
+ // There could be duplicate phi source entries. FIXME: Should sdisel
+ // or earlier pass fixed this?
+ for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ II->RemoveOperand(i+1);
+ II->RemoveOperand(i);
+ }
+ }
+ } else
+ Idx = 0;
+
+ // If Idx is set, the operands at Idx and Idx+1 must be removed.
+ // We reuse the location to avoid expensive RemoveOperand calls.
+
+ DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
+ if (LI != SSAUpdateVals.end()) {
+ // This register is defined in the tail block.
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ // If we didn't duplicate a bb into a particular predecessor, we
+ // might still have added an entry to SSAUpdateVals to correcly
+ // recompute SSA. If that case, avoid adding a dummy extra argument
+ // this PHI.
+ if (!SrcBB->isSuccessor(SuccBB))
+ continue;
+
+ unsigned SrcReg = LI->second[j].second;
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(SrcReg);
+ II->getOperand(Idx+1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(SrcReg).addMBB(SrcBB);
+ }
+ }
+ } else {
+ // Live in tail block, must also be live in predecessors.
+ for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = TDBBs[j];
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(Reg);
+ II->getOperand(Idx+1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(Reg).addMBB(SrcBB);
+ }
+ }
+ }
+ if (Idx != 0) {
+ II->RemoveOperand(Idx+1);
+ II->RemoveOperand(Idx);
+ }
+ }
+ }
+}
+
+/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
+bool
+TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple,
+ MachineBasicBlock &TailBB) {
+ // Only duplicate blocks that end with unconditional branches.
+ if (TailBB.canFallThrough())
+ return false;
+
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB.isSuccessor(&TailBB))
+ return false;
+
+ // Set the limit on the cost to duplicate. When optimizing for size,
+ // duplicate only one, because one branch instruction can be eliminated to
+ // compensate for the duplication.
+ unsigned MaxDuplicateCount;
+ if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+ MaxDuplicateCount = 1;
+ else
+ MaxDuplicateCount = TailDuplicateSize;
+
+ // If the target has hardware branch prediction that can handle indirect
+ // branches, duplicating them can often make them predictable when there
+ // are common paths through the code. The limit needs to be high enough
+ // to allow undoing the effects of tail merging and other optimizations
+ // that rearrange the predecessors of the indirect branch.
+
+ bool HasIndirectbr = false;
+ if (!TailBB.empty())
+ HasIndirectbr = TailBB.back().isIndirectBranch();
+
+ if (HasIndirectbr && PreRegAlloc)
+ MaxDuplicateCount = 20;
+
+ // Check the instructions in the block to determine whether tail-duplication
+ // is invalid or unlikely to be profitable.
+ unsigned InstrCount = 0;
+ for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
+ // Non-duplicable things shouldn't be tail-duplicated.
+ if (I->isNotDuplicable())
+ return false;
+
+ // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+ // A return may expand into a lot more instructions (e.g. reload of callee
+ // saved registers) after PEI.
+ if (PreRegAlloc && I->isReturn())
+ return false;
+
+ // Avoid duplicating calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (PreRegAlloc && I->isCall())
+ return false;
+
+ if (!I->isPHI() && !I->isDebugValue())
+ InstrCount += 1;
+
+ if (InstrCount > MaxDuplicateCount)
+ return false;
+ }
+
+ if (HasIndirectbr && PreRegAlloc)
+ return true;
+
+ if (IsSimple)
+ return true;
+
+ if (!PreRegAlloc)
+ return true;
+
+ return canCompletelyDuplicateBB(TailBB);
+}
+
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+ if (TailBB->succ_size() != 1)
+ return false;
+ if (TailBB->pred_empty())
+ return false;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ MachineBasicBlock::iterator E = TailBB->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I == E)
+ return true;
+ return I->isUnconditionalBranch();
+}
+
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+ SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+ for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+ SE = A.succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *BB = *SI;
+ if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+ return true;
+ }
+
+ return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+ for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+ PE = BB.pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ return false;
+
+ if (!PredCond.empty())
+ return false;
+ }
+ return true;
+}
+
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &UsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
+ TailBB->succ_end());
+ SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ bool Changed = false;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->getLandingPadSuccessor())
+ continue;
+
+ if (bothUsedInPHI(*PredBB, Succs))
+ continue;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+
+ Changed = true;
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From simple Succ: " << *TailBB);
+
+ MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+ // Make PredFBB explicit.
+ if (PredCond.empty())
+ PredFBB = PredTBB;
+
+ // Make fall through explicit.
+ if (!PredTBB)
+ PredTBB = NextBB;
+ if (!PredFBB)
+ PredFBB = NextBB;
+
+ // Redirect
+ if (PredFBB == TailBB)
+ PredFBB = NewTarget;
+ if (PredTBB == TailBB)
+ PredTBB = NewTarget;
+
+ // Make the branch unconditional if possible
+ if (PredTBB == PredFBB) {
+ PredCond.clear();
+ PredFBB = NULL;
+ }
+
+ // Avoid adding fall through branches.
+ if (PredFBB == NextBB)
+ PredFBB = NULL;
+ if (PredTBB == NextBB && PredFBB == NULL)
+ PredTBB = NULL;
+
+ TII->RemoveBranch(*PredBB);
+
+ if (PredTBB)
+ TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+ PredBB->removeSuccessor(TailBB);
+ unsigned NumSuccessors = PredBB->succ_size();
+ assert(NumSuccessors <= 1);
+ if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+ PredBB->addSuccessor(NewTarget);
+
+ TDBBs.push_back(PredBB);
+ }
+ return Changed;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
+ bool IsSimple,
+ MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies) {
+ DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
+ DenseSet<unsigned> UsedByPhi;
+ getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+ if (IsSimple)
+ return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
+ // Iterate through all the unique predecessors and tail-duplicate this
+ // block into them, if possible. Copying the list ahead of time also
+ // avoids trouble with the predecessor list reallocating.
+ bool Changed = false;
+ SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ assert(TailBB != PredBB &&
+ "Single-block loop should have been rejected earlier!");
+ // EH edges are ignored by AnalyzeBranch.
+ if (PredBB->succ_size() > 1)
+ continue;
+
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+ if (!PredCond.empty())
+ continue;
+ // Don't duplicate into a fall-through predecessor (at least for now).
+ if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ continue;
+
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
+
+ TDBBs.push_back(PredBB);
+
+ // Remove PredBB's unconditional branch.
+ TII->RemoveBranch(*PredBB);
+
+ if (RS && !TailBB->livein_empty()) {
+ // Update PredBB livein.
+ RS->enterBasicBlock(PredBB);
+ if (!PredBB->empty())
+ RS->forward(prior(PredBB->end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
+ E = TailBB->livein_end(); I != E; ++I) {
+ if (!RegsLiveAtExit[*I])
+ // If a register is previously livein to the tail but it's not live
+ // at the end of predecessor BB, then it should be added to its
+ // livein list.
+ PredBB->addLiveIn(*I);
+ }
+ }
+
+ // Clone the contents of TailBB into PredBB.
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ // Use instr_iterator here to properly handle bundles, e.g.
+ // ARM Thumb2 IT block.
+ MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
+ while (I != TailBB->instr_end()) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ } else {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
+ }
+ }
+ MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first).addReg(CopyInfos[i].second));
+ }
+
+ // Simplify
+ TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
+ NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
+
+ // Update the CFG.
+ PredBB->removeSuccessor(PredBB->succ_begin());
+ assert(PredBB->succ_empty() &&
+ "TailDuplicate called on block with multiple successors!");
+ for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+ E = TailBB->succ_end(); I != E; ++I)
+ PredBB->addSuccessor(*I);
+
+ Changed = true;
+ ++NumTailDups;
+ }
+
+ // If TailBB was duplicated into all its predecessors except for the prior
+ // block, which falls through unconditionally, move the contents of this
+ // block into the prior block.
+ MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PrevBB->succ_size() == 1 &&
+ !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
+ PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
+ !TailBB->hasAddressTaken()) {
+ DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+ << "From MBB: " << *TailBB);
+ if (PreRegAlloc) {
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ if (MI->getParent())
+ MI->eraseFromParent();
+ }
+
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
+ DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
+ MI->eraseFromParent();
+ }
+ MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first)
+ .addReg(CopyInfos[i].second));
+ }
+ } else {
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+ }
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ }
+
+ // If this is after register allocation, there are no phis to fix.
+ if (!PreRegAlloc)
+ return Changed;
+
+ // If we made no changes so far, we are safe.
+ if (!Changed)
+ return Changed;
+
+
+ // Handle the nasty case in that we duplicated a block that is part of a loop
+ // into some but not all of its predecessors. For example:
+ // 1 -> 2 <-> 3 |
+ // \ |
+ // \---> rest |
+ // if we duplicate 2 into 1 but not into 3, we end up with
+ // 12 -> 3 <-> 2 -> rest |
+ // \ / |
+ // \----->-----/ |
+ // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
+ // with a phi in 3 (which now dominates 2).
+ // What we do here is introduce a copy in 3 of the register defined by the
+ // phi, just like when we are duplicating 2 into 3, but we don't copy any
+ // real instructions or remove the 3 -> 2 edge from the phi in 2.
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+ continue;
+
+ // EH edges
+ if (PredBB->succ_size() != 1)
+ continue;
+
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+ }
+ MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first).addReg(CopyInfos[i].second));
+ }
+ }
+
+ return Changed;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ // Remove all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // Remove the block.
+ MBB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
new file mode 100644
index 0000000..883e9d1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -0,0 +1,44 @@
+//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+ // By default, assume all frame indices are referenced via whatever
+ // getFrameRegister() says. The target can override this if it's doing
+ // something different.
+ FrameReg = RI->getFrameRegister(MF);
+ return getFrameIndexOffset(MF, FI);
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
new file mode 100644
index 0000000..20eb918
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -0,0 +1,739 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<bool> DisableHazardRecognizer(
+ "disable-sched-hazard", cl::Hidden, cl::init(false),
+ cl::desc("Disable hazard detection during preRA scheduling"));
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const {
+ if (OpNum >= MCID.getNumOperands())
+ return 0;
+
+ short RegClass = MCID.OpInfo[OpNum].RegClass;
+ if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+ return TRI->getPointerRegClass(MF, RegClass);
+
+ // Instructions like INSERT_SUBREG do not have fixed register classes.
+ if (RegClass < 0)
+ return 0;
+
+ // Otherwise just look it up normally.
+ return TRI->getRegClass(RegClass);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+
+
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
+ atInsnStart = true;
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ Length += MAI.getMaxInstLength();
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0)
+ atInsnStart = false;
+ }
+
+ return Length;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+
+ // Remove all the old successors of MBB from the CFG.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_begin());
+
+ // Remove all the dead instructions from the end of MBB.
+ MBB->erase(Tail, MBB->end());
+
+ // If MBB isn't immediately before MBB, insert a branch to it.
+ if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+ InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+ Tail->getDebugLoc());
+ MBB->addSuccessor(NewDest);
+}
+
+// commuteInstruction - The default implementation of this method just exchanges
+// the two operands returned by findCommutedOpIndices.
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool HasDef = MCID.getNumDefs();
+ if (HasDef && !MI->getOperand(0).isReg())
+ // No idea how to commute this instruction. Target should implement its own.
+ return 0;
+ unsigned Idx1, Idx2;
+ if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Don't know how to commute: " << *MI;
+ report_fatal_error(Msg.str());
+ }
+
+ assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
+ "This only knows how to commute register operands so far");
+ unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
+ unsigned Reg1 = MI->getOperand(Idx1).getReg();
+ unsigned Reg2 = MI->getOperand(Idx2).getReg();
+ unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0;
+ unsigned SubReg1 = MI->getOperand(Idx1).getSubReg();
+ unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
+ bool Reg1IsKill = MI->getOperand(Idx1).isKill();
+ bool Reg2IsKill = MI->getOperand(Idx2).isKill();
+ // If destination is tied to either of the commuted source register, then
+ // it must be updated.
+ if (HasDef && Reg0 == Reg1 &&
+ MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
+ Reg2IsKill = false;
+ Reg0 = Reg2;
+ SubReg0 = SubReg2;
+ } else if (HasDef && Reg0 == Reg2 &&
+ MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+ Reg1IsKill = false;
+ Reg0 = Reg1;
+ SubReg0 = SubReg1;
+ }
+
+ if (NewMI) {
+ // Create a new instruction.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ }
+
+ if (HasDef) {
+ MI->getOperand(0).setReg(Reg0);
+ MI->getOperand(0).setSubReg(SubReg0);
+ }
+ MI->getOperand(Idx2).setReg(Reg1);
+ MI->getOperand(Idx1).setReg(Reg2);
+ MI->getOperand(Idx2).setSubReg(SubReg1);
+ MI->getOperand(Idx1).setSubReg(SubReg2);
+ MI->getOperand(Idx2).setIsKill(Reg1IsKill);
+ MI->getOperand(Idx1).setIsKill(Reg2IsKill);
+ return MI;
+}
+
+/// findCommutedOpIndices - If specified MI is commutable, return the two
+/// operand indices that would swap value. Return true if the instruction
+/// is not in a form which this routine understands.
+bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ assert(!MI->isBundle() &&
+ "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isCommutable())
+ return false;
+ // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+ // is not true, then the target must implement this.
+ SrcOpIdx1 = MCID.getNumDefs();
+ SrcOpIdx2 = SrcOpIdx1 + 1;
+ if (!MI->getOperand(SrcOpIdx1).isReg() ||
+ !MI->getOperand(SrcOpIdx2).isReg())
+ // No idea.
+ return false;
+ return true;
+}
+
+
+bool
+TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+ if (!MI->isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+
+bool TargetInstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ bool MadeChange = false;
+
+ assert(!MI->isBundle() &&
+ "TargetInstrInfo::PredicateInstruction() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MI->isPredicable())
+ return false;
+
+ for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate()) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ MO.setReg(Pred[j].getReg());
+ MadeChange = true;
+ } else if (MO.isImm()) {
+ MO.setImm(Pred[j].getImm());
+ MadeChange = true;
+ } else if (MO.isMBB()) {
+ MO.setMBB(Pred[j].getMBB());
+ MadeChange = true;
+ }
+ ++j;
+ }
+ }
+ return MadeChange;
+}
+
+bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isLoad() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isStore() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+}
+
+bool
+TargetInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const {
+ assert(!Orig->isNotDuplicable() &&
+ "Instruction cannot be duplicated");
+ return MF.CloneMachineInstr(Orig);
+}
+
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
+ unsigned FoldIdx) {
+ assert(MI->isCopy() && "MI must be a COPY instruction");
+ if (MI->getNumOperands() != 2)
+ return 0;
+ assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+ const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
+ const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
+
+ if (FoldOp.getSubReg() || LiveOp.getSubReg())
+ return 0;
+
+ unsigned FoldReg = FoldOp.getReg();
+ unsigned LiveReg = LiveOp.getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
+ "Cannot fold physregs");
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+ if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+ return RC->contains(LiveOp.getReg()) ? RC : 0;
+
+ if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
+ return RC;
+
+ // FIXME: Allow folding when register classes are memory compatible.
+ return 0;
+}
+
+bool TargetInstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FI) const {
+ unsigned Flags = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (MI->getOperand(Ops[i]).isDef())
+ Flags |= MachineMemOperand::MOStore;
+ else
+ Flags |= MachineMemOperand::MOLoad;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(MBB && "foldMemoryOperand needs an inserted instruction");
+ MachineFunction &MF = *MBB->getParent();
+
+ // Ask the target to do the actual folding.
+ if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+ // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->mayLoad()) &&
+ "Folded a use to a non-load!");
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ assert(MFI.getObjectOffset(FI) != -1);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ NewMI->addMemOperand(MF, MMO);
+
+ // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
+ return MBB->insert(MI, NewMI);
+ }
+
+ // Straight COPY may fold as load/store.
+ if (!MI->isCopy() || Ops.size() != 1)
+ return 0;
+
+ const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ if (!RC)
+ return 0;
+
+ const MachineOperand &MO = MI->getOperand(1-Ops[0]);
+ MachineBasicBlock::iterator Pos = MI;
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ if (Flags == MachineMemOperand::MOStore)
+ storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+ else
+ loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+ return --Pos;
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+ if (!NewMI) return 0;
+
+ NewMI = MBB.insert(MI, NewMI);
+
+ // Copy the memoperands from the load to the folded instruction.
+ NewMI->setMemRefs(LoadMI->memoperands_begin(),
+ LoadMI->memoperands_end());
+
+ return NewMI;
+}
+
+bool TargetInstrInfo::
+isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+
+ // Remat clients assume operand 0 is the defined register.
+ if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
+ return false;
+ unsigned DefReg = MI->getOperand(0).getReg();
+
+ // A sub-register definition can only be rematerialized if the instruction
+ // doesn't read the other parts of the register. Otherwise it is really a
+ // read-modify-write operation on the full virtual register which cannot be
+ // moved safely.
+ if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
+ MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg))
+ return false;
+
+ // A load from a fixed stack slot can be rematerialized. This may be
+ // redundant with subsequent checks, but it's target-independent,
+ // simple, and a common case.
+ int FrameIdx = 0;
+ if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+ MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+ return true;
+
+ // Avoid instructions obviously unsafe for remat.
+ if (MI->isNotDuplicable() || MI->mayStore() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ // Don't remat inline asm. We have no idea how expensive it is
+ // even if it's side effect free.
+ if (MI->isInlineAsm())
+ return false;
+
+ // Avoid instructions which load from potentially varying memory.
+ if (MI->mayLoad() && !MI->isInvariantLoad(AA))
+ return false;
+
+ // If any of the registers accessed are non-constant, conservatively assume
+ // the instruction is not rematerializable.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // Check for a well-behaved physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI.isConstantPhysReg(Reg, MF))
+ return false;
+ } else {
+ // A physreg def. We can't remat it.
+ return false;
+ }
+ continue;
+ }
+
+ // Only allow one virtual-register def. There may be multiple defs of the
+ // same virtual register, though.
+ if (MO.isDef() && Reg != DefReg)
+ return false;
+
+ // Don't allow any virtual-register uses. Rematting an instruction with
+ // virtual register uses would length the live ranges of the uses, which
+ // is not necessarily a good idea, certainly not "trivial".
+ if (MO.isUse())
+ return false;
+ }
+
+ // Everything checked out.
+ return true;
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Terminators and labels can't be scheduled around.
+ if (MI->isTerminator() || MI->isLabel())
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
+ return true;
+
+ return false;
+}
+
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfo::usePreRAHazardRecognizer() const {
+ return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ // Dummy hazard recognizer allows all instructions to issue.
+ return new ScheduleHazardRecognizer();
+}
+
+// Default implementation of CreateTargetMIHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "misched");
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG latency interface.
+//===----------------------------------------------------------------------===//
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr latency interface.
+//===----------------------------------------------------------------------===//
+
+unsigned
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Class = MI->getDesc().getSchedClass();
+ int UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps >= 0)
+ return UOps;
+
+ // The # of u-ops is dynamically determined. The specific target should
+ // override this function to return the right number.
+ return 1;
+}
+
+/// Return the default expected latency for a def based on it's opcode.
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
+ const MachineInstr *DefMI) const {
+ if (DefMI->isTransient())
+ return 0;
+ if (DefMI->mayLoad())
+ return SchedModel->LoadLatency;
+ if (isHighLatencyDef(DefMI->getOpcode()))
+ return SchedModel->HighLatency;
+ return 1;
+}
+
+unsigned TargetInstrInfo::
+getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
+ return MI->mayLoad() ? 2 : 1;
+
+ return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// Both DefMI and UseMI must be valid. By default, call directly to the
+/// itinerary. This may be overriden by the target.
+int TargetInstrInfo::
+getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned UseClass = UseMI->getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+/// If we can determine the operand latency from the def only, without itinerary
+/// lookup, do so. Otherwise return -1.
+int TargetInstrInfo::computeDefOperandLatency(
+ const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, bool FindMin) const {
+
+ // Let the target hook getInstrLatency handle missing itineraries.
+ if (!ItinData)
+ return getInstrLatency(ItinData, DefMI);
+
+ // Return a latency based on the itinerary properties and defining instruction
+ // if possible. Some common subtargets don't require per-operand latency,
+ // especially for minimum latencies.
+ if (FindMin) {
+ // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
+ // it exists before defaulting to MinLatency.
+ if (ItinData->SchedModel->MinLatency >= 0)
+ return getInstrLatency(ItinData, DefMI);
+
+ // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
+ // For empty itineraries, short-cirtuit the check and default to one cycle.
+ if (ItinData->isEmpty())
+ return 1;
+ }
+ else if(ItinData->isEmpty())
+ return defaultDefLatency(ItinData->SchedModel, DefMI);
+
+ // ...operand lookup required
+ return -1;
+}
+
+/// computeOperandLatency - Compute and return the latency of the given data
+/// dependent def and use when the operand indices are already known. UseMI may
+/// be NULL for an unknown use.
+///
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
+/// latency is used for scheduling groups, while expected latency is for
+/// instruction cost and critical path.
+///
+/// Depending on the subtarget's itinerary properties, this may or may not need
+/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or
+/// UseIdx to compute min latency.
+unsigned TargetInstrInfo::
+computeOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx,
+ bool FindMin) const {
+
+ int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
+
+ int OperLatency = 0;
+ if (UseMI)
+ OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ else {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+ }
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
+ return InstrLatency;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
new file mode 100644
index 0000000..f42bdbd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -0,0 +1,1305 @@
+//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLoweringBase class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+using namespace llvm;
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
+ Names[RTLIB::SHL_I16] = "__ashlhi3";
+ Names[RTLIB::SHL_I32] = "__ashlsi3";
+ Names[RTLIB::SHL_I64] = "__ashldi3";
+ Names[RTLIB::SHL_I128] = "__ashlti3";
+ Names[RTLIB::SRL_I16] = "__lshrhi3";
+ Names[RTLIB::SRL_I32] = "__lshrsi3";
+ Names[RTLIB::SRL_I64] = "__lshrdi3";
+ Names[RTLIB::SRL_I128] = "__lshrti3";
+ Names[RTLIB::SRA_I16] = "__ashrhi3";
+ Names[RTLIB::SRA_I32] = "__ashrsi3";
+ Names[RTLIB::SRA_I64] = "__ashrdi3";
+ Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I8] = "__mulqi3";
+ Names[RTLIB::MUL_I16] = "__mulhi3";
+ Names[RTLIB::MUL_I32] = "__mulsi3";
+ Names[RTLIB::MUL_I64] = "__muldi3";
+ Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::MULO_I32] = "__mulosi4";
+ Names[RTLIB::MULO_I64] = "__mulodi4";
+ Names[RTLIB::MULO_I128] = "__muloti4";
+ Names[RTLIB::SDIV_I8] = "__divqi3";
+ Names[RTLIB::SDIV_I16] = "__divhi3";
+ Names[RTLIB::SDIV_I32] = "__divsi3";
+ Names[RTLIB::SDIV_I64] = "__divdi3";
+ Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I8] = "__udivqi3";
+ Names[RTLIB::UDIV_I16] = "__udivhi3";
+ Names[RTLIB::UDIV_I32] = "__udivsi3";
+ Names[RTLIB::UDIV_I64] = "__udivdi3";
+ Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I8] = "__modqi3";
+ Names[RTLIB::SREM_I16] = "__modhi3";
+ Names[RTLIB::SREM_I32] = "__modsi3";
+ Names[RTLIB::SREM_I64] = "__moddi3";
+ Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I8] = "__umodqi3";
+ Names[RTLIB::UREM_I16] = "__umodhi3";
+ Names[RTLIB::UREM_I32] = "__umodsi3";
+ Names[RTLIB::UREM_I64] = "__umoddi3";
+ Names[RTLIB::UREM_I128] = "__umodti3";
+
+ // These are generally not available.
+ Names[RTLIB::SDIVREM_I8] = 0;
+ Names[RTLIB::SDIVREM_I16] = 0;
+ Names[RTLIB::SDIVREM_I32] = 0;
+ Names[RTLIB::SDIVREM_I64] = 0;
+ Names[RTLIB::SDIVREM_I128] = 0;
+ Names[RTLIB::UDIVREM_I8] = 0;
+ Names[RTLIB::UDIVREM_I16] = 0;
+ Names[RTLIB::UDIVREM_I32] = 0;
+ Names[RTLIB::UDIVREM_I64] = 0;
+ Names[RTLIB::UDIVREM_I128] = 0;
+
+ Names[RTLIB::NEG_I32] = "__negsi2";
+ Names[RTLIB::NEG_I64] = "__negdi2";
+ Names[RTLIB::ADD_F32] = "__addsf3";
+ Names[RTLIB::ADD_F64] = "__adddf3";
+ Names[RTLIB::ADD_F80] = "__addxf3";
+ Names[RTLIB::ADD_F128] = "__addtf3";
+ Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+ Names[RTLIB::SUB_F32] = "__subsf3";
+ Names[RTLIB::SUB_F64] = "__subdf3";
+ Names[RTLIB::SUB_F80] = "__subxf3";
+ Names[RTLIB::SUB_F128] = "__subtf3";
+ Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+ Names[RTLIB::MUL_F32] = "__mulsf3";
+ Names[RTLIB::MUL_F64] = "__muldf3";
+ Names[RTLIB::MUL_F80] = "__mulxf3";
+ Names[RTLIB::MUL_F128] = "__multf3";
+ Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+ Names[RTLIB::DIV_F32] = "__divsf3";
+ Names[RTLIB::DIV_F64] = "__divdf3";
+ Names[RTLIB::DIV_F80] = "__divxf3";
+ Names[RTLIB::DIV_F128] = "__divtf3";
+ Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+ Names[RTLIB::REM_F32] = "fmodf";
+ Names[RTLIB::REM_F64] = "fmod";
+ Names[RTLIB::REM_F80] = "fmodl";
+ Names[RTLIB::REM_F128] = "fmodl";
+ Names[RTLIB::REM_PPCF128] = "fmodl";
+ Names[RTLIB::FMA_F32] = "fmaf";
+ Names[RTLIB::FMA_F64] = "fma";
+ Names[RTLIB::FMA_F80] = "fmal";
+ Names[RTLIB::FMA_F128] = "fmal";
+ Names[RTLIB::FMA_PPCF128] = "fmal";
+ Names[RTLIB::POWI_F32] = "__powisf2";
+ Names[RTLIB::POWI_F64] = "__powidf2";
+ Names[RTLIB::POWI_F80] = "__powixf2";
+ Names[RTLIB::POWI_F128] = "__powitf2";
+ Names[RTLIB::POWI_PPCF128] = "__powitf2";
+ Names[RTLIB::SQRT_F32] = "sqrtf";
+ Names[RTLIB::SQRT_F64] = "sqrt";
+ Names[RTLIB::SQRT_F80] = "sqrtl";
+ Names[RTLIB::SQRT_F128] = "sqrtl";
+ Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+ Names[RTLIB::LOG_F32] = "logf";
+ Names[RTLIB::LOG_F64] = "log";
+ Names[RTLIB::LOG_F80] = "logl";
+ Names[RTLIB::LOG_F128] = "logl";
+ Names[RTLIB::LOG_PPCF128] = "logl";
+ Names[RTLIB::LOG2_F32] = "log2f";
+ Names[RTLIB::LOG2_F64] = "log2";
+ Names[RTLIB::LOG2_F80] = "log2l";
+ Names[RTLIB::LOG2_F128] = "log2l";
+ Names[RTLIB::LOG2_PPCF128] = "log2l";
+ Names[RTLIB::LOG10_F32] = "log10f";
+ Names[RTLIB::LOG10_F64] = "log10";
+ Names[RTLIB::LOG10_F80] = "log10l";
+ Names[RTLIB::LOG10_F128] = "log10l";
+ Names[RTLIB::LOG10_PPCF128] = "log10l";
+ Names[RTLIB::EXP_F32] = "expf";
+ Names[RTLIB::EXP_F64] = "exp";
+ Names[RTLIB::EXP_F80] = "expl";
+ Names[RTLIB::EXP_F128] = "expl";
+ Names[RTLIB::EXP_PPCF128] = "expl";
+ Names[RTLIB::EXP2_F32] = "exp2f";
+ Names[RTLIB::EXP2_F64] = "exp2";
+ Names[RTLIB::EXP2_F80] = "exp2l";
+ Names[RTLIB::EXP2_F128] = "exp2l";
+ Names[RTLIB::EXP2_PPCF128] = "exp2l";
+ Names[RTLIB::SIN_F32] = "sinf";
+ Names[RTLIB::SIN_F64] = "sin";
+ Names[RTLIB::SIN_F80] = "sinl";
+ Names[RTLIB::SIN_F128] = "sinl";
+ Names[RTLIB::SIN_PPCF128] = "sinl";
+ Names[RTLIB::COS_F32] = "cosf";
+ Names[RTLIB::COS_F64] = "cos";
+ Names[RTLIB::COS_F80] = "cosl";
+ Names[RTLIB::COS_F128] = "cosl";
+ Names[RTLIB::COS_PPCF128] = "cosl";
+ Names[RTLIB::POW_F32] = "powf";
+ Names[RTLIB::POW_F64] = "pow";
+ Names[RTLIB::POW_F80] = "powl";
+ Names[RTLIB::POW_F128] = "powl";
+ Names[RTLIB::POW_PPCF128] = "powl";
+ Names[RTLIB::CEIL_F32] = "ceilf";
+ Names[RTLIB::CEIL_F64] = "ceil";
+ Names[RTLIB::CEIL_F80] = "ceill";
+ Names[RTLIB::CEIL_F128] = "ceill";
+ Names[RTLIB::CEIL_PPCF128] = "ceill";
+ Names[RTLIB::TRUNC_F32] = "truncf";
+ Names[RTLIB::TRUNC_F64] = "trunc";
+ Names[RTLIB::TRUNC_F80] = "truncl";
+ Names[RTLIB::TRUNC_F128] = "truncl";
+ Names[RTLIB::TRUNC_PPCF128] = "truncl";
+ Names[RTLIB::RINT_F32] = "rintf";
+ Names[RTLIB::RINT_F64] = "rint";
+ Names[RTLIB::RINT_F80] = "rintl";
+ Names[RTLIB::RINT_F128] = "rintl";
+ Names[RTLIB::RINT_PPCF128] = "rintl";
+ Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+ Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+ Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+ Names[RTLIB::FLOOR_F32] = "floorf";
+ Names[RTLIB::FLOOR_F64] = "floor";
+ Names[RTLIB::FLOOR_F80] = "floorl";
+ Names[RTLIB::FLOOR_F128] = "floorl";
+ Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::COPYSIGN_F32] = "copysignf";
+ Names[RTLIB::COPYSIGN_F64] = "copysign";
+ Names[RTLIB::COPYSIGN_F80] = "copysignl";
+ Names[RTLIB::COPYSIGN_F128] = "copysignl";
+ Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+ Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
+ Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+ Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+ Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+ Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+ Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+ Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
+ Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+ Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+ Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
+ Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+ Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+ Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+ Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+ Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+ Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
+ Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+ Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+ Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
+ Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+ Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+ Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+ Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+ Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+ Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
+ Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+ Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+ Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+ Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+ Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+ Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+ Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+ Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
+ Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+ Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+ Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+ Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+ Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+ Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+ Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+ Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
+ Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+ Names[RTLIB::OEQ_F32] = "__eqsf2";
+ Names[RTLIB::OEQ_F64] = "__eqdf2";
+ Names[RTLIB::OEQ_F128] = "__eqtf2";
+ Names[RTLIB::UNE_F32] = "__nesf2";
+ Names[RTLIB::UNE_F64] = "__nedf2";
+ Names[RTLIB::UNE_F128] = "__netf2";
+ Names[RTLIB::OGE_F32] = "__gesf2";
+ Names[RTLIB::OGE_F64] = "__gedf2";
+ Names[RTLIB::OGE_F128] = "__getf2";
+ Names[RTLIB::OLT_F32] = "__ltsf2";
+ Names[RTLIB::OLT_F64] = "__ltdf2";
+ Names[RTLIB::OLT_F128] = "__lttf2";
+ Names[RTLIB::OLE_F32] = "__lesf2";
+ Names[RTLIB::OLE_F64] = "__ledf2";
+ Names[RTLIB::OLE_F128] = "__letf2";
+ Names[RTLIB::OGT_F32] = "__gtsf2";
+ Names[RTLIB::OGT_F64] = "__gtdf2";
+ Names[RTLIB::OGT_F128] = "__gttf2";
+ Names[RTLIB::UO_F32] = "__unordsf2";
+ Names[RTLIB::UO_F64] = "__unorddf2";
+ Names[RTLIB::UO_F128] = "__unordtf2";
+ Names[RTLIB::O_F32] = "__unordsf2";
+ Names[RTLIB::O_F64] = "__unorddf2";
+ Names[RTLIB::O_F128] = "__unordtf2";
+ Names[RTLIB::MEMCPY] = "memcpy";
+ Names[RTLIB::MEMMOVE] = "memmove";
+ Names[RTLIB::MEMSET] = "memset";
+ Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+ Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+ Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+ Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+ Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+ Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+ Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+ Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+ Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
+
+ if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) {
+ Names[RTLIB::SINCOS_F32] = "sincosf";
+ Names[RTLIB::SINCOS_F64] = "sincos";
+ Names[RTLIB::SINCOS_F80] = "sincosl";
+ Names[RTLIB::SINCOS_F128] = "sincosl";
+ Names[RTLIB::SINCOS_PPCF128] = "sincosl";
+ } else {
+ // These are generally not available.
+ Names[RTLIB::SINCOS_F32] = 0;
+ Names[RTLIB::SINCOS_F64] = 0;
+ Names[RTLIB::SINCOS_F80] = 0;
+ Names[RTLIB::SINCOS_F128] = 0;
+ Names[RTLIB::SINCOS_PPCF128] = 0;
+ }
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+ CCs[i] = CallingConv::C;
+ }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::f64)
+ return FPEXT_F32_F64;
+ if (RetVT == MVT::f128)
+ return FPEXT_F32_F128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::f128)
+ return FPEXT_F64_F128;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+ if (RetVT == MVT::f32) {
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F32;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F32;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F32;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F32;
+ } else if (RetVT == MVT::f64) {
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F64;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F64;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F64;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F64_I16;
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F32_I16;
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F64_I16;
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::UNE_F128] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OGE_F128] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLT_F128] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OLE_F128] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::OGT_F128] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::UO_F128] = ISD::SETNE;
+ CCs[RTLIB::O_F32] = ISD::SETEQ;
+ CCs[RTLIB::O_F64] = ISD::SETEQ;
+ CCs[RTLIB::O_F128] = ISD::SETEQ;
+}
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
+ const TargetLoweringObjectFile *tlof)
+ : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadExtActions, 0, sizeof(LoadExtActions));
+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+ memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+ // Set default actions for various operations.
+ for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+ // Default all indexed load / store to expand.
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+ setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // These operations default to expand.
+ setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // Most targets ignore the @llvm.prefetch intrinsic.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+ // ConstantFP nodes default to expand. Targets can either change this to
+ // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+ // to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(ISD::FLOG , MVT::f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+ setOperationAction(ISD::FEXP , MVT::f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::f128, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f128, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f128, Expand);
+ setOperationAction(ISD::FEXP , MVT::f128, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f128, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f128, Expand);
+ setOperationAction(ISD::FRINT, MVT::f128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
+
+ // Default ISD::TRAP to expand (which turns it into abort).
+ setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+ // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+ // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+ //
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
+ IsLittleEndian = TD->isLittleEndian();
+ PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+ MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+ = MaxStoresPerMemmoveOptSize = 4;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
+ PredictableSelectIsExpensive = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ BooleanContents = UndefinedBooleanContent;
+ BooleanVectorContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = Sched::ILP;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ MinFunctionAlignment = 0;
+ PrefFunctionAlignment = 0;
+ PrefLoopAlignment = 0;
+ MinStackArgumentAlignment = 1;
+ ShouldFoldAtomicFences = false;
+ InsertFencesForAtomic = false;
+ SupportJumpTables = true;
+ MinimumJumpTableEntries = 4;
+
+ InitLibcallNames(LibcallRoutineNames, TM);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+ InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLoweringBase::~TargetLoweringBase() {
+ delete &TLOF;
+}
+
+MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
+ return MVT::getIntegerVT(8*TD->getPointerSize(0));
+}
+
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
+ assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+ if (LHSTy.isVector())
+ return LHSTy;
+ return getScalarShiftAmountTy(LHSTy);
+}
+
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
+ assert(isTypeLegal(VT));
+ switch (Op) {
+ default:
+ return false;
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return true;
+ }
+}
+
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT,
+ TargetLoweringBase *TLI) {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+ if (!TLI->isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ MVT DestVT = TLI->getRegisterType(NewVT);
+ RegisterVT = DestVT;
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I))
+ return true;
+ }
+ return false;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+std::pair<const TargetRegisterClass*, uint8_t>
+TargetLoweringBase::findRepresentativeClass(MVT VT) const {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
+ if (!RC)
+ return std::make_pair(RC, 0);
+
+ // Compute the set of all super-register classes.
+ BitVector SuperRegRC(TRI->getNumRegClasses());
+ for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
+ SuperRegRC.setBitsInMask(RCI.getMask());
+
+ // Find the first legal register class with the largest spill size.
+ const TargetRegisterClass *BestRC = RC;
+ for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+ const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
+ // We want the largest possible spill size.
+ if (SuperRC->getSize() <= BestRC->getSize())
+ continue;
+ if (!isLegalRC(SuperRC))
+ continue;
+ BestRC = SuperRC;
+ }
+ return std::make_pair(BestRC, 1);
+}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLoweringBase::computeRegisterProperties() {
+ assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+ for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (unsigned ExpandedReg = LargestIntReg + 1;
+ ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+ ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
+ TypeExpandInteger);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ unsigned LegalIntReg = LargestIntReg;
+ for (unsigned IntReg = LargestIntReg - 1;
+ IntReg >= (unsigned)MVT::i1; --IntReg) {
+ MVT IVT = (MVT::SimpleValueType)IntReg;
+ if (isTypeLegal(IVT)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+ (const MVT::SimpleValueType)LegalIntReg;
+ ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
+ }
+ }
+
+ // ppcf128 type is really two f64's.
+ if (!isTypeLegal(MVT::ppcf128)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+ }
+
+ // Decide how to handle f128. If the target does not have native f128 support,
+ // expand it to i128 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f128)) {
+ NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
+ RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
+ TransformToType[MVT::f128] = MVT::i128;
+ ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f32. If the target does not have native support for
+ // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+ if (!isTypeLegal(MVT::f32)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+ TransformToType[MVT::f32] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
+ } else {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
+ }
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ if (isTypeLegal(VT)) continue;
+
+ // Determine if there is a legal wider type. If so, we should promote to
+ // that wider vector type.
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
+ bool IsLegalWiderType = false;
+ // First try to promote the elements of integer vectors. If no legal
+ // promotion was found, fallback to the widen-vector method.
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+ // Promote vectors of integers to vectors with the same number
+ // of elements, with a wider element type.
+ if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+ && SVT.getVectorNumElements() == NElts &&
+ isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+
+ if (IsLegalWiderType) continue;
+
+ // Try to widen the vector.
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts &&
+ isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType) continue;
+ }
+
+ MVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+ RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ MVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ unsigned NumElts = VT.getVectorNumElements();
+ ValueTypeActions.setTypeAction(VT,
+ NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ }
+ }
+
+ // Determine the 'representative' register class for each value type.
+ // An representative register class is the largest (meaning one which is
+ // not a sub-register class / subreg register class) legal register class for
+ // a group of value types. For example, on i386, i8, i16, and i32
+ // representative would be GR32; while on x86_64 it's GR64.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ const TargetRegisterClass* RRC;
+ uint8_t Cost;
+ tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
+ RepRegClassForVT[i] = RRC;
+ RepRegClassCostForVT[i] = Cost;
+ }
+}
+
+EVT TargetLoweringBase::getSetCCResultType(EVT VT) const {
+ assert(!VT.isVector() && "No default SetCC type for vectors!");
+ return getPointerTy(0).SimpleTy;
+}
+
+MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
+ return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If there is a wider vector type with the same element type as this one,
+ // or a promoted vector type that has the same number of elements which
+ // are wider, then we should convert to that legal vector type.
+ // This handles things like <2 x float> -> <4 x float> and
+ // <4 x i1> -> <4 x i32>.
+ LegalizeTypeAction TA = getTypeAction(Context, VT);
+ if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+ EVT RegisterEVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterEVT)) {
+ IntermediateVT = RegisterEVT;
+ RegisterVT = RegisterEVT.getSimpleVT();
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
+ EVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !isTypeLegal(
+ EVT::getVectorVT(Context, EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ MVT DestVT = getRegisterType(Context, NewVT);
+ RegisterVT = DestVT;
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function. This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const TargetLowering &TLI) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, ReturnType, ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ Flags.setSExt();
+ else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i)
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
+ return TD->getCallFrameTypeAlignment(Ty);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetTransformInfo Helpers
+//===----------------------------------------------------------------------===//
+
+int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
+ enum InstructionOpcodes {
+#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
+#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
+#include "llvm/IR/Instruction.def"
+ };
+ switch (static_cast<InstructionOpcodes>(Opcode)) {
+ case Ret: return 0;
+ case Br: return 0;
+ case Switch: return 0;
+ case IndirectBr: return 0;
+ case Invoke: return 0;
+ case Resume: return 0;
+ case Unreachable: return 0;
+ case Add: return ISD::ADD;
+ case FAdd: return ISD::FADD;
+ case Sub: return ISD::SUB;
+ case FSub: return ISD::FSUB;
+ case Mul: return ISD::MUL;
+ case FMul: return ISD::FMUL;
+ case UDiv: return ISD::UDIV;
+ case SDiv: return ISD::UDIV;
+ case FDiv: return ISD::FDIV;
+ case URem: return ISD::UREM;
+ case SRem: return ISD::SREM;
+ case FRem: return ISD::FREM;
+ case Shl: return ISD::SHL;
+ case LShr: return ISD::SRL;
+ case AShr: return ISD::SRA;
+ case And: return ISD::AND;
+ case Or: return ISD::OR;
+ case Xor: return ISD::XOR;
+ case Alloca: return 0;
+ case Load: return ISD::LOAD;
+ case Store: return ISD::STORE;
+ case GetElementPtr: return 0;
+ case Fence: return 0;
+ case AtomicCmpXchg: return 0;
+ case AtomicRMW: return 0;
+ case Trunc: return ISD::TRUNCATE;
+ case ZExt: return ISD::ZERO_EXTEND;
+ case SExt: return ISD::SIGN_EXTEND;
+ case FPToUI: return ISD::FP_TO_UINT;
+ case FPToSI: return ISD::FP_TO_SINT;
+ case UIToFP: return ISD::UINT_TO_FP;
+ case SIToFP: return ISD::SINT_TO_FP;
+ case FPTrunc: return ISD::FP_ROUND;
+ case FPExt: return ISD::FP_EXTEND;
+ case PtrToInt: return ISD::BITCAST;
+ case IntToPtr: return ISD::BITCAST;
+ case BitCast: return ISD::BITCAST;
+ case ICmp: return ISD::SETCC;
+ case FCmp: return ISD::SETCC;
+ case PHI: return 0;
+ case Call: return 0;
+ case Select: return ISD::SELECT;
+ case UserOp1: return 0;
+ case UserOp2: return 0;
+ case VAArg: return 0;
+ case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
+ case InsertElement: return ISD::INSERT_VECTOR_ELT;
+ case ShuffleVector: return ISD::VECTOR_SHUFFLE;
+ case ExtractValue: return ISD::MERGE_VALUES;
+ case InsertValue: return ISD::MERGE_VALUES;
+ case LandingPad: return 0;
+ }
+
+ llvm_unreachable("Unknown instruction type encountered!");
+}
+
+std::pair<unsigned, MVT>
+TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const {
+ LLVMContext &C = Ty->getContext();
+ EVT MTy = getValueType(Ty);
+
+ unsigned Cost = 1;
+ // We keep legalizing the type until we find a legal kind. We assume that
+ // the only operation that costs anything is the split. After splitting
+ // we need to handle two types.
+ while (true) {
+ LegalizeKind LK = getTypeConversion(C, MTy);
+
+ if (LK.first == TypeLegal)
+ return std::make_pair(Cost, MTy.getSimpleVT());
+
+ if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
+ Cost *= 2;
+
+ // Keep legalizing the type.
+ MTy = LK.second;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 0000000..3bdca4c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,784 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF
+//===----------------------------------------------------------------------===//
+
+MCSymbol *
+TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
+ Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ unsigned Encoding = getPersonalityEncoding();
+ switch (Encoding & 0x70) {
+ default:
+ report_fatal_error("We do not support this DWARF encoding yet!");
+ case dwarf::DW_EH_PE_absptr:
+ return Mang->getSymbol(GV);
+ case dwarf::DW_EH_PE_pcrel: {
+ return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+ Mang->getSymbol(GV)->getName());
+ }
+ }
+}
+
+void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
+ const TargetMachine &TM,
+ const MCSymbol *Sym) const {
+ SmallString<64> NameData("DW.ref.");
+ NameData += Sym->getName();
+ MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
+ Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
+ Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
+ StringRef Prefix = ".data.";
+ NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
+ const MCSection *Sec = getContext().getELFSection(NameData,
+ ELF::SHT_PROGBITS,
+ Flags,
+ SectionKind::getDataRel(),
+ 0, Label->getName());
+ unsigned Size = TM.getDataLayout()->getPointerSize();
+ Streamer.SwitchSection(Sec);
+ Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+ Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+ const MCExpr *E = MCConstantExpr::Create(Size, getContext());
+ Streamer.EmitELFSize(Label, E);
+ Streamer.EmitLabel(Label);
+
+ Streamer.EmitSymbolValue(Sym, Size);
+}
+
+const MCExpr *TargetLoweringObjectFileELF::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+
+ if (Encoding & dwarf::DW_EH_PE_indirect) {
+ MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += ".DW.stub";
+
+ // Add information about the stub reference to ELFMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+ Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+ // N.B.: The defaults used in here are no the same ones used in MC.
+ // We follow gcc, MC follows gas. For example, given ".section .eh_frame",
+ // both gas and MC will produce a section with no flags. Given
+ // section(".eh_frame") gcc will produce:
+ //
+ // .section .eh_frame,"a",@progbits
+ if (Name.empty() || Name[0] != '.') return K;
+
+ // Some lame default implementation based on some magic section names.
+ if (Name == ".bss" ||
+ Name.startswith(".bss.") ||
+ Name.startswith(".gnu.linkonce.b.") ||
+ Name.startswith(".llvm.linkonce.b.") ||
+ Name == ".sbss" ||
+ Name.startswith(".sbss.") ||
+ Name.startswith(".gnu.linkonce.sb.") ||
+ Name.startswith(".llvm.linkonce.sb."))
+ return SectionKind::getBSS();
+
+ if (Name == ".tdata" ||
+ Name.startswith(".tdata.") ||
+ Name.startswith(".gnu.linkonce.td.") ||
+ Name.startswith(".llvm.linkonce.td."))
+ return SectionKind::getThreadData();
+
+ if (Name == ".tbss" ||
+ Name.startswith(".tbss.") ||
+ Name.startswith(".gnu.linkonce.tb.") ||
+ Name.startswith(".llvm.linkonce.tb."))
+ return SectionKind::getThreadBSS();
+
+ return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+ if (Name == ".init_array")
+ return ELF::SHT_INIT_ARRAY;
+
+ if (Name == ".fini_array")
+ return ELF::SHT_FINI_ARRAY;
+
+ if (Name == ".preinit_array")
+ return ELF::SHT_PREINIT_ARRAY;
+
+ if (K.isBSS() || K.isThreadBSS())
+ return ELF::SHT_NOBITS;
+
+ return ELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (!K.isMetadata())
+ Flags |= ELF::SHF_ALLOC;
+
+ if (K.isText())
+ Flags |= ELF::SHF_EXECINSTR;
+
+ if (K.isWriteable())
+ Flags |= ELF::SHF_WRITE;
+
+ if (K.isThreadLocal())
+ Flags |= ELF::SHF_TLS;
+
+ // K.isMergeableConst() is left out to honour PR4650
+ if (K.isMergeableCString() || K.isMergeableConst4() ||
+ K.isMergeableConst8() || K.isMergeableConst16())
+ Flags |= ELF::SHF_MERGE;
+
+ if (K.isMergeableCString())
+ Flags |= ELF::SHF_STRINGS;
+
+ return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ StringRef SectionName = GV->getSection();
+
+ // Infer section flags from the section name if we can.
+ Kind = getELFKindForNamedSection(SectionName, Kind);
+
+ return getContext().getELFSection(SectionName,
+ getELFSectionType(SectionName, Kind),
+ getELFSectionFlags(Kind), Kind);
+}
+
+/// getSectionPrefixForGlobal - Return the section prefix name used by options
+/// FunctionsSections and DataSections.
+static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+ if (Kind.isText()) return ".text.";
+ if (Kind.isReadOnly()) return ".rodata.";
+ if (Kind.isBSS()) return ".bss.";
+
+ if (Kind.isThreadData()) return ".tdata.";
+ if (Kind.isThreadBSS()) return ".tbss.";
+
+ if (Kind.isDataNoRel()) return ".data.";
+ if (Kind.isDataRelLocal()) return ".data.rel.local.";
+ if (Kind.isDataRel()) return ".data.rel.";
+ if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return ".data.rel.ro.";
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniquedSection;
+ if (Kind.isText())
+ EmitUniquedSection = TM.getFunctionSections();
+ else
+ EmitUniquedSection = TM.getDataSections();
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+ !Kind.isCommon()) {
+ const char *Prefix;
+ Prefix = getSectionPrefixForGlobal(Kind);
+
+ SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append(Sym->getName().begin(), Sym->getName().end());
+ StringRef Group = "";
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (GV->isWeakForLinker()) {
+ Group = Sym->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
+ return getContext().getELFSection(Name.str(),
+ getELFSectionType(Name.str(), Kind),
+ Flags, Kind, 0, Group);
+ }
+
+ if (Kind.isText()) return TextSection;
+
+ if (Kind.isMergeable1ByteCString() ||
+ Kind.isMergeable2ByteCString() ||
+ Kind.isMergeable4ByteCString()) {
+
+ // We also need alignment here.
+ // FIXME: this is getting the alignment of the character, not the
+ // alignment of the global!
+ unsigned Align =
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+ const char *SizeSpec = ".rodata.str1.";
+ if (Kind.isMergeable2ByteCString())
+ SizeSpec = ".rodata.str2.";
+ else if (Kind.isMergeable4ByteCString())
+ SizeSpec = ".rodata.str4.";
+ else
+ assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+ std::string Name = SizeSpec + utostr(Align);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_MERGE |
+ ELF::SHF_STRINGS,
+ Kind);
+ }
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ return ReadOnlySection; // .const
+ }
+
+ if (Kind.isReadOnly()) return ReadOnlySection;
+
+ if (Kind.isThreadData()) return TLSDataSection;
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+
+ // Note: we claim that common symbols are put in BSSSection, but they are
+ // really emitted with the magic .comm directive, which creates a symbol table
+ // entry but not a section.
+ if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+
+ if (Kind.isDataNoRel()) return DataSection;
+ if (Kind.isDataRelLocal()) return DataRelLocalSection;
+ if (Kind.isDataRel()) return DataRelSection;
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (Priority == 65535)
+ return StaticCtorSection;
+
+ if (UseInitArray) {
+ std::string Name = std::string(".init_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ } else {
+ std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+}
+
+const MCSection *
+TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (Priority == 65535)
+ return StaticDtorSection;
+
+ if (UseInitArray) {
+ std::string Name = std::string(".fini_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ } else {
+ std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+}
+
+void
+TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
+ UseInitArray = UseInitArray_;
+ if (!UseInitArray)
+ return;
+
+ StaticCtorSection =
+ getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ StaticDtorSection =
+ getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+}
+
+//===----------------------------------------------------------------------===//
+// MachO
+//===----------------------------------------------------------------------===//
+
+/// emitModuleFlags - Perform code emission for module flags.
+void TargetLoweringObjectFileMachO::
+emitModuleFlags(MCStreamer &Streamer,
+ ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ Mangler *Mang, const TargetMachine &TM) const {
+ unsigned VersionVal = 0;
+ unsigned ImageInfoFlags = 0;
+ MDNode *LinkerOptions = 0;
+ StringRef SectionVal;
+
+ for (ArrayRef<Module::ModuleFlagEntry>::iterator
+ i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+ const Module::ModuleFlagEntry &MFE = *i;
+
+ // Ignore flags with 'Require' behavior.
+ if (MFE.Behavior == Module::Require)
+ continue;
+
+ StringRef Key = MFE.Key->getString();
+ Value *Val = MFE.Val;
+
+ if (Key == "Objective-C Image Info Version") {
+ VersionVal = cast<ConstantInt>(Val)->getZExtValue();
+ } else if (Key == "Objective-C Garbage Collection" ||
+ Key == "Objective-C GC Only" ||
+ Key == "Objective-C Is Simulated") {
+ ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue();
+ } else if (Key == "Objective-C Image Info Section") {
+ SectionVal = cast<MDString>(Val)->getString();
+ } else if (Key == "Linker Options") {
+ LinkerOptions = cast<MDNode>(Val);
+ }
+ }
+
+ // Emit the linker options if present.
+ if (LinkerOptions) {
+ for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
+ MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+ SmallVector<std::string, 4> StrOptions;
+
+ // Convert to strings.
+ for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
+ MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
+ StrOptions.push_back(MDOption->getString());
+ }
+
+ Streamer.EmitLinkerOptions(StrOptions);
+ }
+ }
+
+ // The section is mandatory. If we don't have it, then we don't have GC info.
+ if (SectionVal.empty()) return;
+
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty())
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Invalid section specifier '" + Section + "': " +
+ ErrorCode + ".");
+
+ // Get the section.
+ const MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize,
+ SectionKind::getDataNoRel());
+ Streamer.SwitchSection(S);
+ Streamer.EmitLabel(getContext().
+ GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+ Streamer.EmitIntValue(VersionVal, 4);
+ Streamer.EmitIntValue(ImageInfoFlags, 4);
+ Streamer.AddBlankLine();
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // Parse the section specifier and create it if valid.
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty()) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' has an invalid section specifier '" +
+ GV->getSection() + "': " + ErrorCode + ".");
+ }
+
+ // Get the section.
+ const MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+ // If TAA wasn't set by ParseSectionSpecifier() above,
+ // use the value returned by getMachOSection() as a default.
+ if (!TAAParsed)
+ TAA = S->getTypeAndAttributes();
+
+ // Okay, now that we got the section, verify that the TAA & StubSize agree.
+ // If the user declared multiple globals with different section flags, we need
+ // to reject it here.
+ if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' section type or attributes does not match previous"
+ " section specifier");
+ }
+
+ return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
+
+ if (Kind.isText())
+ return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+ // If this is weak/linkonce, put this in a coalescable section, either in text
+ // or data depending on if it is writable.
+ if (GV->isWeakForLinker()) {
+ if (Kind.isReadOnly())
+ return ConstTextCoalSection;
+ return DataCoalSection;
+ }
+
+ // FIXME: Alignment check should be handled by section classifier.
+ if (Kind.isMergeable1ByteCString() &&
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return CStringSection;
+
+ // Do not put 16-bit arrays in the UString section if they have an
+ // externally visible label, this runs into issues with certain linker
+ // versions.
+ if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return UStringSection;
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ }
+
+ // Otherwise, if it is readonly, but not something we can specially optimize,
+ // just drop it in .const.
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ // If this is marked const, put it into a const section. But if the dynamic
+ // linker needs to write to it, put it in the data segment.
+ if (Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ // Put zero initialized globals with strong external linkage in the
+ // DATA, __common section with the .zerofill directive.
+ if (Kind.isBSSExtern())
+ return DataCommonSection;
+
+ // Put zero initialized globals with local linkage in __DATA,__bss directive
+ // with the .zerofill directive (aka .lcomm).
+ if (Kind.isBSSLocal())
+ return DataBSSSection;
+
+ // Otherwise, just drop the variable in the normal data section.
+ return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+ // If this constant requires a relocation, we have to put it in the data
+ // segment, not in the text segment.
+ if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ return ReadOnlySection; // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+ /// On Darwin, internally linked data beginning with "L" or "l" does not have
+ /// the directive emitted (this occurs in ObjC metadata).
+ if (!GV) return false;
+
+ // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+ if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+ // FIXME: ObjC metadata is currently emitted as internal symbols that have
+ // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and
+ // this horrible hack can go away.
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
+ return false;
+ }
+
+ return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+
+ if (Encoding & DW_EH_PE_indirect) {
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
+ MachOMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+ Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+MCSymbol *TargetLoweringObjectFileMachO::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return SSym;
+}
+
+//===----------------------------------------------------------------------===//
+// COFF
+//===----------------------------------------------------------------------===//
+
+static unsigned
+getCOFFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (K.isMetadata())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isText())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_CNT_CODE;
+ else if (K.isBSS ())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isThreadLocal())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isReadOnly())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ;
+ else if (K.isWriteable())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ return Flags;
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ int Selection = 0;
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+ SmallString<128> Name(GV->getSection().c_str());
+ if (GV->isWeakForLinker()) {
+ Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append("$");
+ Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+ }
+ return getContext().getCOFFSection(Name,
+ Characteristics,
+ Selection,
+ Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+ if (Kind.isText())
+ return ".text$";
+ if (Kind.isBSS ())
+ return ".bss$";
+ if (Kind.isThreadLocal()) {
+ // 'LLVM' is just an arbitary string to ensure that the section name gets
+ // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker.
+ return ".tls$LLVM";
+ }
+ if (Kind.isWriteable())
+ return ".data$";
+ return ".rdata$";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if (GV->isWeakForLinker()) {
+ const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+ SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+ return getContext().getCOFFSection(Name.str(), Characteristics,
+ COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
+ }
+
+ if (Kind.isText())
+ return getTextSection();
+
+ if (Kind.isThreadLocal())
+ return getTLSDataSection();
+
+ return getDataSection();
+}
+
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 0000000..0f59d01
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,52 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+ // Check to see if we should eliminate non-leaf frame pointers and then
+ // check to see if we should eliminate all frame pointers.
+ if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasCalls();
+ }
+
+ return NoFramePointerElim;
+}
+
+/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+/// is specified on the command line. When this flag is off(default), the
+/// code generator is not allowed to generate mad (multiply add) if the
+/// result is "less precise" than doing those operations individually.
+bool TargetOptions::LessPreciseFPMAD() const {
+ return UnsafeFPMath || LessPreciseFPMADOption;
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
+
+/// getTrapFunctionName - If this returns a non-empty string, this means isel
+/// should lower Intrinsic::trap to a call to the specified function name
+/// instead of an ISD::TRAP node.
+StringRef TargetOptions::getTrapFunctionName() const {
+ return TrapFuncName;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
new file mode 100644
index 0000000..84b4bfc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -0,0 +1,285 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+ regclass_iterator RCB, regclass_iterator RCE,
+ const char *const *SRINames,
+ const unsigned *SRILaneMasks)
+ : InfoDesc(ID), SubRegIndexNames(SRINames),
+ SubRegIndexLaneMasks(SRILaneMasks),
+ RegClassBegin(RCB), RegClassEnd(RCE) {
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+void PrintReg::print(raw_ostream &OS) const {
+ if (!Reg)
+ OS << "%noreg";
+ else if (TargetRegisterInfo::isStackSlot(Reg))
+ OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (TRI && Reg < TRI->getNumRegs())
+ OS << '%' << TRI->getName(Reg);
+ else
+ OS << "%physreg" << Reg;
+ if (SubIdx) {
+ if (TRI)
+ OS << ':' << TRI->getSubRegIndexName(SubIdx);
+ else
+ OS << ":sub(" << SubIdx << ')';
+ }
+}
+
+void PrintRegUnit::print(raw_ostream &OS) const {
+ // Generic printout when TRI is missing.
+ if (!TRI) {
+ OS << "Unit~" << Unit;
+ return;
+ }
+
+ // Check for invalid register units.
+ if (Unit >= TRI->getNumRegUnits()) {
+ OS << "BadUnit~" << Unit;
+ return;
+ }
+
+ // Normal units have at least one root.
+ MCRegUnitRootIterator Roots(Unit, TRI);
+ assert(Roots.isValid() && "Unit has no roots.");
+ OS << TRI->getName(*Roots);
+ for (++Roots; Roots.isValid(); ++Roots)
+ OS << '~' << TRI->getName(*Roots);
+}
+
+/// getAllocatableClass - Return the maximal subclass of the given register
+/// class that is alloctable, or NULL.
+const TargetRegisterClass *
+TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
+ if (!RC || RC->isAllocatable())
+ return RC;
+
+ const unsigned *SubClass = RC->getSubClassMask();
+ for (unsigned Base = 0, BaseE = getNumRegClasses();
+ Base < BaseE; Base += 32) {
+ unsigned Idx = Base;
+ for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) {
+ unsigned Offset = CountTrailingZeros_32(Mask);
+ const TargetRegisterClass *SubRC = getRegClass(Idx + Offset);
+ if (SubRC->isAllocatable())
+ return SubRC;
+ Mask >>= Offset;
+ Idx += Offset + 1;
+ }
+ }
+ return NULL;
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+ assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+ // Pick the most sub register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass* BestRC = 0;
+ for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+ const TargetRegisterClass* RC = *I;
+ if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+ (!BestRC || BestRC->hasSubClass(RC)))
+ BestRC = RC;
+ }
+
+ assert(BestRC && "Couldn't find the register class");
+ return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+ const TargetRegisterClass *RC, BitVector &R){
+ assert(RC->isAllocatable() && "invalid for nonallocatable sets");
+ ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
+ for (unsigned i = 0; i != Order.size(); ++i)
+ R.set(Order[i]);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC) const {
+ BitVector Allocatable(getNumRegs());
+ if (RC) {
+ // A register class with no allocatable subclass returns an empty set.
+ const TargetRegisterClass *SubClass = getAllocatableClass(RC);
+ if (SubClass)
+ getAllocatableSetForRC(MF, SubClass, Allocatable);
+ } else {
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ if ((*I)->isAllocatable())
+ getAllocatableSetForRC(MF, *I, Allocatable);
+ }
+
+ // Mask out the reserved registers
+ BitVector Reserved = getReservedRegs(MF);
+ Allocatable &= Reserved.flip();
+
+ return Allocatable;
+}
+
+static inline
+const TargetRegisterClass *firstCommonClass(const uint32_t *A,
+ const uint32_t *B,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
+ if (unsigned Common = *A++ & *B++)
+ return TRI->getRegClass(I + CountTrailingZeros_32(Common));
+ return 0;
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B) const {
+ // First take care of the trivial cases.
+ if (A == B)
+ return A;
+ if (!A || !B)
+ return 0;
+
+ // Register classes are ordered topologically, so the largest common
+ // sub-class it the common sub-class with the smallest ID.
+ return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned Idx) const {
+ assert(A && B && "Missing register class");
+ assert(Idx && "Bad sub-register index");
+
+ // Find Idx in the list of super-register indices.
+ for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI)
+ if (RCI.getSubReg() == Idx)
+ // The bit mask contains all register classes that are projected into B
+ // by Idx. Find a class that is also a sub-class of A.
+ return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
+ return 0;
+}
+
+const TargetRegisterClass *TargetRegisterInfo::
+getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
+ const TargetRegisterClass *RCB, unsigned SubB,
+ unsigned &PreA, unsigned &PreB) const {
+ assert(RCA && SubA && RCB && SubB && "Invalid arguments");
+
+ // Search all pairs of sub-register indices that project into RCA and RCB
+ // respectively. This is quadratic, but usually the sets are very small. On
+ // most targets like X86, there will only be a single sub-register index
+ // (e.g., sub_16bit projecting into GR16).
+ //
+ // The worst case is a register class like DPR on ARM.
+ // We have indices dsub_0..dsub_7 projecting into that class.
+ //
+ // It is very common that one register class is a sub-register of the other.
+ // Arrange for RCA to be the larger register so the answer will be found in
+ // the first iteration. This makes the search linear for the most common
+ // case.
+ const TargetRegisterClass *BestRC = 0;
+ unsigned *BestPreA = &PreA;
+ unsigned *BestPreB = &PreB;
+ if (RCA->getSize() < RCB->getSize()) {
+ std::swap(RCA, RCB);
+ std::swap(SubA, SubB);
+ std::swap(BestPreA, BestPreB);
+ }
+
+ // Also terminate the search one we have found a register class as small as
+ // RCA.
+ unsigned MinSize = RCA->getSize();
+
+ for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
+ unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
+ for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) {
+ // Check if a common super-register class exists for this index pair.
+ const TargetRegisterClass *RC =
+ firstCommonClass(IA.getMask(), IB.getMask(), this);
+ if (!RC || RC->getSize() < MinSize)
+ continue;
+
+ // The indexes must compose identically: PreA+SubA == PreB+SubB.
+ unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB);
+ if (FinalA != FinalB)
+ continue;
+
+ // Is RC a better candidate than BestRC?
+ if (BestRC && RC->getSize() >= BestRC->getSize())
+ continue;
+
+ // Yes, RC is the smallest super-register seen so far.
+ BestRC = RC;
+ *BestPreA = IA.getSubReg();
+ *BestPreB = IB.getSubReg();
+
+ // Bail early if we reached MinSize. We won't find a better candidate.
+ if (BestRC->getSize() == MinSize)
+ return BestRC;
+ }
+ }
+ return BestRC;
+}
+
+// Compute target-independent register allocator hints to help eliminate copies.
+void
+TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+ // Hints with HintType != 0 were set by target-dependent code.
+ // Such targets must provide their own implementation of
+ // TRI::getRegAllocationHints to interpret those hint types.
+ assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints");
+
+ // Target-independent hints are either a physical or a virtual register.
+ unsigned Phys = Hint.second;
+ if (VRM && isVirtualRegister(Phys))
+ Phys = VRM->getPhys(Phys);
+
+ // Check that Phys is a valid hint in VirtReg's register class.
+ if (!isPhysicalRegister(Phys))
+ return;
+ if (MRI.isReserved(Phys))
+ return;
+ // Check that Phys is in the allocation order. We shouldn't heed hints
+ // from VirtReg's register class if they aren't in the allocation order. The
+ // target probably has a reason for removing the register.
+ if (std::find(Order.begin(), Order.end(), Phys) == Order.end())
+ return;
+
+ // All clear, tell the register allocator to prefer this register.
+ Hints.push_back(Phys);
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
new file mode 100644
index 0000000..783bfa1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -0,0 +1,309 @@
+//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a wrapper around MCSchedModel that allows the interface
+// to benefit from information currently only available in TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
+ cl::desc("Use TargetSchedModel for latency lookup"));
+
+static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
+ cl::desc("Use InstrItineraryData for latency lookup"));
+
+bool TargetSchedModel::hasInstrSchedModel() const {
+ return EnableSchedModel && SchedModel.hasInstrSchedModel();
+}
+
+bool TargetSchedModel::hasInstrItineraries() const {
+ return EnableSchedItins && !InstrItins.isEmpty();
+}
+
+static unsigned gcd(unsigned Dividend, unsigned Divisor) {
+ // Dividend and Divisor will be naturally swapped as needed.
+ while(Divisor) {
+ unsigned Rem = Dividend % Divisor;
+ Dividend = Divisor;
+ Divisor = Rem;
+ };
+ return Dividend;
+}
+static unsigned lcm(unsigned A, unsigned B) {
+ unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
+ assert((LCM >= A && LCM >= B) && "LCM overflow");
+ return LCM;
+}
+
+void TargetSchedModel::init(const MCSchedModel &sm,
+ const TargetSubtargetInfo *sti,
+ const TargetInstrInfo *tii) {
+ SchedModel = sm;
+ STI = sti;
+ TII = tii;
+ STI->initInstrItins(InstrItins);
+
+ unsigned NumRes = SchedModel.getNumProcResourceKinds();
+ ResourceFactors.resize(NumRes);
+ ResourceLCM = SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ if (NumUnits > 0)
+ ResourceLCM = lcm(ResourceLCM, NumUnits);
+ }
+ MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
+ }
+}
+
+unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrItineraries()) {
+ int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
+ return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI);
+ }
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->NumMicroOps;
+ }
+ return MI->isTransient() ? 0 : 1;
+}
+
+// The machine model may explicitly specify an invalid latency, which
+// effectively means infinite latency. Since users of the TargetSchedule API
+// don't know how to handle this, we convert it to a very large latency that is
+// easy to distinguish when debugging the DAG but won't induce overflow.
+static unsigned convertLatency(int Cycles) {
+ return Cycles >= 0 ? Cycles : 1000;
+}
+
+/// If we can determine the operand latency from the def only, without machine
+/// model or itinerary lookup, do so. Otherwise return -1.
+int TargetSchedModel::getDefLatency(const MachineInstr *DefMI,
+ bool FindMin) const {
+
+ // Return a latency based on the itinerary properties and defining instruction
+ // if possible. Some common subtargets don't require per-operand latency,
+ // especially for minimum latencies.
+ if (FindMin) {
+ // If MinLatency is invalid, then use the itinerary for MinLatency. If no
+ // itinerary exists either, then use single cycle latency.
+ if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) {
+ return 1;
+ }
+ return SchedModel.MinLatency;
+ }
+ else if (!hasInstrSchedModel() && !hasInstrItineraries()) {
+ return TII->defaultDefLatency(&SchedModel, DefMI);
+ }
+ // ...operand lookup required
+ return -1;
+}
+
+/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
+/// evaluation of predicates that depend on instruction operands or flags.
+const MCSchedClassDesc *TargetSchedModel::
+resolveSchedClass(const MachineInstr *MI) const {
+
+ // Get the definition's scheduling class descriptor from this machine model.
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+
+#ifndef NDEBUG
+ unsigned NIter = 0;
+#endif
+ while (SCDesc->isVariant()) {
+ assert(++NIter < 6 && "Variants are nested deeper than the magic number");
+
+ SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
+ SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ }
+ return SCDesc;
+}
+
+/// Find the def index of this operand. This index maps to the machine model and
+/// is independent of use operands. Def operands may be reordered with uses or
+/// merged with uses without affecting the def index (e.g. before/after
+/// regalloc). However, an instruction's def operands must never be reordered
+/// with respect to each other.
+static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
+ unsigned DefIdx = 0;
+ for (unsigned i = 0; i != DefOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ ++DefIdx;
+ }
+ return DefIdx;
+}
+
+/// Find the use index of this operand. This is independent of the instruction's
+/// def operands.
+///
+/// Note that uses are not determined by the operand's isUse property, which
+/// is simply the inverse of isDef. Here we consider any readsReg operand to be
+/// a "use". The machine model allows an operand to be both a Def and Use.
+static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
+ unsigned UseIdx = 0;
+ for (unsigned i = 0; i != UseOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.readsReg())
+ ++UseIdx;
+ }
+ return UseIdx;
+}
+
+// Top-level API for clients that know the operand indices.
+unsigned TargetSchedModel::computeOperandLatency(
+ const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *UseMI, unsigned UseOperIdx,
+ bool FindMin) const {
+
+ int DefLatency = getDefLatency(DefMI, FindMin);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ if (hasInstrItineraries()) {
+ int OperLatency = 0;
+ if (UseMI) {
+ OperLatency =
+ TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx);
+ }
+ else {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
+ }
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ // Rather than directly querying InstrItins stage latency, we call a TII
+ // hook to allow subtargets to specialize latency. This hook is only
+ // applicable to the InstrItins model. InstrSchedModel should model all
+ // special cases without TII hooks.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency,
+ TII->defaultDefLatency(&SchedModel, DefMI));
+ return InstrLatency;
+ }
+ assert(!FindMin && hasInstrSchedModel() &&
+ "Expected a SchedModel for this cpu");
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
+ if (DefIdx < SCDesc->NumWriteLatencyEntries) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ unsigned WriteID = WLEntry->WriteResourceID;
+ unsigned Latency = convertLatency(WLEntry->Cycles);
+ if (!UseMI)
+ return Latency;
+
+ // Lookup the use's latency adjustment in SubtargetInfo.
+ const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
+ if (UseDesc->NumReadAdvanceEntries == 0)
+ return Latency;
+ unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
+ return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+ }
+ // If DefIdx does not exist in the model (e.g. implicit defs), then return
+ // unit latency (defaultDefLatency may be too conservative).
+#ifndef NDEBUG
+ if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
+ && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) {
+ std::string Err;
+ raw_string_ostream ss(Err);
+ ss << "DefIdx " << DefIdx << " exceeds machine model writes for "
+ << *DefMI;
+ report_fatal_error(ss.str());
+ }
+#endif
+ // FIXME: Automatically giving all implicit defs defaultDefLatency is
+ // undesirable. We should only do it for defs that are known to the MC
+ // desc like flags. Truly implicit defs should get 1 cycle latency.
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
+}
+
+unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
+ // For the itinerary model, fall back to the old subtarget hook.
+ // Allow subtargets to compute Bundle latencies outside the machine model.
+ if (hasInstrItineraries() || MI->isBundle())
+ return TII->getInstrLatency(&InstrItins, MI);
+
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
+ if (SCDesc->isValid()) {
+ unsigned Latency = 0;
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ Latency = std::max(Latency, convertLatency(WLEntry->Cycles));
+ }
+ return Latency;
+ }
+ }
+ return TII->defaultDefLatency(&SchedModel, MI);
+}
+
+unsigned TargetSchedModel::
+computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *DepMI) const {
+ // MinLatency == -1 is for in-order processors that always have unit
+ // MinLatency. MinLatency > 0 is for in-order processors with varying min
+ // latencies, but since this is not a RAW dep, we always use unit latency.
+ if (SchedModel.MinLatency != 0)
+ return 1;
+
+ // MinLatency == 0 indicates an out-of-order processor that can dispatch
+ // WAW dependencies in the same cycle.
+
+ // Treat predication as a data dependency for out-of-order cpus. In-order
+ // cpus do not need to treat predicated writes specially.
+ //
+ // TODO: The following hack exists because predication passes do not
+ // correctly append imp-use operands, and readsReg() strangely returns false
+ // for predicated defs.
+ unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
+ const MachineFunction &MF = *DefMI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI))
+ return computeInstrLatency(DefMI);
+
+ // If we have a per operand scheduling model, check if this def is writing
+ // an unbuffered resource. If so, it treated like an in-order cpu.
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ if (SCDesc->isValid()) {
+ for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
+ *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
+ if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 0000000..e6dfe10
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,1678 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+// A = B op C
+//
+// to:
+//
+// A = B
+// A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
+STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
+STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
+
+namespace {
+class TwoAddressInstructionPass : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ MachineRegisterInfo *MRI;
+ LiveVariables *LV;
+ LiveIntervals *LIS;
+ AliasAnalysis *AA;
+ CodeGenOpt::Level OptLevel;
+
+ // The current basic block being processed.
+ MachineBasicBlock *MBB;
+
+ // DistanceMap - Keep track the distance of a MI from the start of the
+ // current basic block.
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+ // Set of already processed instructions in the current block.
+ SmallPtrSet<MachineInstr*, 8> Processed;
+
+ // SrcRegMap - A map from virtual registers to physical registers which are
+ // likely targets to be coalesced to due to copies from physical registers to
+ // virtual registers. e.g. v1024 = move r0.
+ DenseMap<unsigned, unsigned> SrcRegMap;
+
+ // DstRegMap - A map from virtual registers to physical registers which are
+ // likely targets to be coalesced to due to copies to physical registers from
+ // virtual registers. e.g. r1 = move v1024.
+ DenseMap<unsigned, unsigned> DstRegMap;
+
+ bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
+ MachineBasicBlock::iterator OldPos);
+
+ bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
+
+ bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ MachineInstr *MI, unsigned Dist);
+
+ bool commuteInstruction(MachineBasicBlock::iterator &mi,
+ unsigned RegB, unsigned RegC, unsigned Dist);
+
+ bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+
+ bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned RegA, unsigned RegB, unsigned Dist);
+
+ bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI);
+
+ bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+ bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+
+ bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist, bool shouldOnlyCommute);
+
+ void scanUses(unsigned DstReg);
+
+ void processCopy(MachineInstr *MI);
+
+ typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+ typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+ bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+ void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+ void eliminateRegSequence(MachineBasicBlock::iterator&);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - Pass entry point.
+ bool runOnMachineFunction(MachineFunction&);
+};
+} // end anonymous namespace
+
+char TwoAddressInstructionPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
+
+/// sink3AddrInstruction - A two-address instruction has been converted to a
+/// three-address instruction to avoid clobbering a register. Try to sink it
+/// past the instruction that would kill the above mentioned register to reduce
+/// register pressure.
+bool TwoAddressInstructionPass::
+sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
+ MachineBasicBlock::iterator OldPos) {
+ // FIXME: Shouldn't we be trying to do this before we three-addressify the
+ // instruction? After this transformation is done, we no longer need
+ // the instruction to be in three-address form.
+
+ // Check if it's safe to move this instruction.
+ bool SeenStore = true; // Be conservative.
+ if (!MI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ unsigned DefReg = 0;
+ SmallSet<unsigned, 4> UseRegs;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse() && MOReg != SavedReg)
+ UseRegs.insert(MO.getReg());
+ if (!MO.isDef())
+ continue;
+ if (MO.isImplicit())
+ // Don't try to move it if it implicitly defines a register.
+ return false;
+ if (DefReg)
+ // For now, don't move any instructions that define multiple registers.
+ return false;
+ DefReg = MO.getReg();
+ }
+
+ // Find the instruction that kills SavedReg.
+ MachineInstr *KillMI = NULL;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(SavedReg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ }
+ if (!KillMI) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SavedReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (!UseMO.isKill())
+ continue;
+ KillMI = UseMO.getParent();
+ break;
+ }
+ }
+
+ // If we find the instruction that kills SavedReg, and it is in an
+ // appropriate location, we can try to sink the current instruction
+ // past it.
+ if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
+ KillMI == OldPos || KillMI->isTerminator())
+ return false;
+
+ // If any of the definitions are used by another instruction between the
+ // position and the kill use, then it's not safe to sink it.
+ //
+ // FIXME: This can be sped up if there is an easy way to query whether an
+ // instruction is before or after another instruction. Then we can use
+ // MachineRegisterInfo def / use instead.
+ MachineOperand *KillMO = NULL;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+
+ unsigned NumVisited = 0;
+ for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (DefReg == MOReg)
+ return false;
+
+ if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) {
+ if (OtherMI == KillMI && MOReg == SavedReg)
+ // Save the operand that kills the register. We want to unset the kill
+ // marker if we can sink MI past it.
+ KillMO = &MO;
+ else if (UseRegs.count(MOReg))
+ // One of the uses is killed before the destination.
+ return false;
+ }
+ }
+ }
+ assert(KillMO && "Didn't find kill");
+
+ if (!LIS) {
+ // Update kill and LV information.
+ KillMO->setIsKill(false);
+ KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+ KillMO->setIsKill(true);
+
+ if (LV)
+ LV->replaceKillInstruction(SavedReg, KillMI, MI);
+ }
+
+ // Move instruction to its destination.
+ MBB->remove(MI);
+ MBB->insert(KillPos, MI);
+
+ if (LIS)
+ LIS->handleMove(MI);
+
+ ++Num3AddrSunk;
+ return true;
+}
+
+/// noUseAfterLastDef - Return true if there are no intervening uses between the
+/// last instruction in the MBB that defines the specified register and the
+/// two-address instruction which is being processed. It also returns the last
+/// def location by reference
+bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
+ unsigned &LastDef) {
+ LastDef = 0;
+ unsigned LastUse = Dist;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB || MI->isDebugValue())
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (MO.isUse() && DI->second < LastUse)
+ LastUse = DI->second;
+ if (MO.isDef() && DI->second > LastDef)
+ LastDef = DI->second;
+ }
+
+ return !(LastUse > LastDef && LastUse < Dist);
+}
+
+/// isCopyToReg - Return true if the specified MI is a copy instruction or
+/// a extract_subreg instruction. It also returns the source and destination
+/// registers and whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+ unsigned &SrcReg, unsigned &DstReg,
+ bool &IsSrcPhys, bool &IsDstPhys) {
+ SrcReg = 0;
+ DstReg = 0;
+ if (MI.isCopy()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else
+ return false;
+
+ IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return true;
+}
+
+/// isPLainlyKilled - Test if the given register value, which is used by the
+// given instruction, is killed by the given instruction.
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
+ LiveIntervals *LIS) {
+ if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LIS->isNotInMIMap(MI)) {
+ // FIXME: Sometimes tryInstructionTransform() will add instructions and
+ // test whether they can be folded before keeping them. In this case it
+ // sets a kill before recursively calling tryInstructionTransform() again.
+ // If there is no interval available, we assume that this instruction is
+ // one of those. A kill flag is manually inserted on the operand so the
+ // check below will handle it.
+ LiveInterval &LI = LIS->getInterval(Reg);
+ // This is to match the kill flag version where undefs don't have kill
+ // flags.
+ if (!LI.hasAtLeastOneValue())
+ return false;
+
+ SlotIndex useIdx = LIS->getInstructionIndex(MI);
+ LiveInterval::const_iterator I = LI.find(useIdx);
+ assert(I != LI.end() && "Reg must be live-in to use.");
+ return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+ }
+
+ return MI->killsRegister(Reg);
+}
+
+/// isKilled - Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+/// %reg1034 = copy %reg1024
+/// %reg1035 = copy %reg1025<kill>
+/// %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+/// If allowFalsePositives is true then likely kills are treated as kills even
+/// if it can't be proven that they are kills.
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+ const MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ LiveIntervals *LIS,
+ bool allowFalsePositives) {
+ MachineInstr *DefMI = &MI;
+ for (;;) {
+ // All uses of physical registers are likely to be kills.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ (allowFalsePositives || MRI->hasOneUse(Reg)))
+ return true;
+ if (!isPlainlyKilled(DefMI, Reg, LIS))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return true;
+ MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+ // If there are multiple defs, we can't do a simple analysis, so just
+ // go with what the kill flag says.
+ if (llvm::next(Begin) != MRI->def_end())
+ return true;
+ DefMI = &*Begin;
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ // If the def is something other than a copy, then it isn't going to
+ // be coalesced, so follow the kill flag.
+ if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return true;
+ Reg = SrcReg;
+ }
+}
+
+/// isTwoAddrUse - Return true if the specified MI uses the specified register
+/// as a two-address use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned NumOps = MI.isInlineAsm()
+ ? MI.getNumOperands() : MCID.getNumOperands();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ unsigned ti;
+ if (MI.isRegTiedToDefOperand(i, &ti)) {
+ DstReg = MI.getOperand(ti).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// findOnlyInterestingUse - Given a register, if has a single in-basic block
+/// use, return the use instruction if it's a copy or a two-address use.
+static
+MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ bool &IsCopy,
+ unsigned &DstReg, bool &IsDstPhys) {
+ if (!MRI->hasOneNonDBGUse(Reg))
+ // None or more than one use.
+ return 0;
+ MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg);
+ if (UseMI.getParent() != MBB)
+ return 0;
+ unsigned SrcReg;
+ bool IsSrcPhys;
+ if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+ IsCopy = true;
+ return &UseMI;
+ }
+ IsDstPhys = false;
+ if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return &UseMI;
+ }
+ return 0;
+}
+
+/// getMappedReg - Return the physical register the specified virtual register
+/// might be mapped to.
+static unsigned
+getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
+ while (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+ if (SI == RegMap.end())
+ return 0;
+ Reg = SI->second;
+ }
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg;
+ return 0;
+}
+
+/// regsAreCompatible - Return true if the two registers are equal or aliased.
+///
+static bool
+regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+ if (RegA == RegB)
+ return true;
+ if (!RegA || !RegB)
+ return false;
+ return TRI->regsOverlap(RegA, RegB);
+}
+
+
+/// isProfitableToCommute - Return true if it's potentially profitable to commute
+/// the two-address instruction that's being processed.
+bool
+TwoAddressInstructionPass::
+isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ MachineInstr *MI, unsigned Dist) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ // Determine if it's profitable to commute this two address instruction. In
+ // general, we want no uses between this instruction and the definition of
+ // the two-address register.
+ // e.g.
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1028
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // In this case, it might not be possible to coalesce the second MOV8rr
+ // instruction if the first one is coalesced. So it would be profitable to
+ // commute it:
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1029
+ // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+
+ if (!isPlainlyKilled(MI, regC, LIS))
+ return false;
+
+ // Ok, we have something like:
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // let's see if it's worth commuting it.
+
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r0 = MOV %reg1026
+ // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+ unsigned ToRegA = getMappedReg(regA, DstRegMap);
+ if (ToRegA) {
+ unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+ unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI);
+ bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI);
+ if (BComp != CComp)
+ return !BComp && CComp;
+ }
+
+ // If there is a use of regC between its last def (could be livein) and this
+ // instruction, then bail.
+ unsigned LastDefC = 0;
+ if (!noUseAfterLastDef(regC, Dist, LastDefC))
+ return false;
+
+ // If there is a use of regB between its last def (could be livein) and this
+ // instruction, then go ahead and make this transformation.
+ unsigned LastDefB = 0;
+ if (!noUseAfterLastDef(regB, Dist, LastDefB))
+ return true;
+
+ // Since there are no intervening uses for both registers, then commute
+ // if the def of regC is closer. Its live interval is shorter.
+ return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// commuteInstruction - Commute a two-address instruction and update the basic
+/// block, distance map, and live variables if needed. Return true if it is
+/// successful.
+bool TwoAddressInstructionPass::
+commuteInstruction(MachineBasicBlock::iterator &mi,
+ unsigned RegB, unsigned RegC, unsigned Dist) {
+ MachineInstr *MI = mi;
+ DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+
+ if (NewMI == 0) {
+ DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
+ assert(NewMI == MI &&
+ "TargetInstrInfo::commuteInstruction() should not return a new "
+ "instruction unless it was requested.");
+
+ // Update source register map.
+ unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+ if (FromRegC) {
+ unsigned RegA = MI->getOperand(0).getReg();
+ SrcRegMap[RegA] = FromRegC;
+ }
+
+ return true;
+}
+
+/// isProfitableToConv3Addr - Return true if it is profitable to convert the
+/// given 2-address instruction to a 3-address one.
+bool
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r2 = MOV %reg1026
+ // Turn ADD into a 3-address instruction to avoid a copy.
+ unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+ if (!FromRegB)
+ return false;
+ unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
+}
+
+/// convertInstTo3Addr - Convert the specified two-address instruction into a
+/// three address one. Return true if this transformation was successful.
+bool
+TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned RegA, unsigned RegB,
+ unsigned Dist) {
+ // FIXME: Why does convertToThreeAddress() need an iterator reference?
+ MachineFunction::iterator MFI = MBB;
+ MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV);
+ assert(MBB == MFI && "convertToThreeAddress changed iterator reference");
+ if (!NewMI)
+ return false;
+
+ DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+ bool Sunk = false;
+
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(mi, NewMI);
+
+ if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+ // FIXME: Temporary workaround. If the new instruction doesn't
+ // uses RegB, convertToThreeAddress must have created more
+ // then one instruction.
+ Sunk = sink3AddrInstruction(NewMI, RegB, mi);
+
+ MBB->erase(mi); // Nuke the old inst.
+
+ if (!Sunk) {
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ mi = NewMI;
+ nmi = llvm::next(mi);
+ }
+
+ // Update source and destination register maps.
+ SrcRegMap.erase(RegA);
+ DstRegMap.erase(RegB);
+ return true;
+}
+
+/// scanUses - Scan forward recursively for only uses, update maps if the use
+/// is a copy or a two-address instruction.
+void
+TwoAddressInstructionPass::scanUses(unsigned DstReg) {
+ SmallVector<unsigned, 4> VirtRegPairs;
+ bool IsDstPhys;
+ bool IsCopy = false;
+ unsigned NewReg = 0;
+ unsigned Reg = DstReg;
+ while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
+ NewReg, IsDstPhys)) {
+ if (IsCopy && !Processed.insert(UseMI))
+ break;
+
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end())
+ // Earlier in the same MBB.Reached via a back edge.
+ break;
+
+ if (IsDstPhys) {
+ VirtRegPairs.push_back(NewReg);
+ break;
+ }
+ bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
+ if (!isNew)
+ assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+ VirtRegPairs.push_back(NewReg);
+ Reg = NewReg;
+ }
+
+ if (!VirtRegPairs.empty()) {
+ unsigned ToReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ while (!VirtRegPairs.empty()) {
+ unsigned FromReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
+ ToReg = FromReg;
+ }
+ bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!");
+ }
+}
+
+/// processCopy - If the specified instruction is not yet processed, process it
+/// if it's a copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1 = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
+ if (Processed.count(MI))
+ return;
+
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return;
+
+ if (IsDstPhys && !IsSrcPhys)
+ DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+ else if (!IsDstPhys && IsSrcPhys) {
+ bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[DstReg] == SrcReg &&
+ "Can't map to two src physical registers!");
+
+ scanUses(DstReg);
+ }
+
+ Processed.insert(MI);
+ return;
+}
+
+/// rescheduleMIBelowKill - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
+/// instruction in order to eliminate the need for the copy.
+bool TwoAddressInstructionPass::
+rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
+ return false;
+
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = 0;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+ KillMI->isBranch() || KillMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!MI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ if (TII->getInstrLatency(InstrItins, MI) > 1)
+ // FIXME: Needs more sophisticated heuristics.
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef())
+ Defs.insert(MOReg);
+ else {
+ Uses.insert(MOReg);
+ if (MOReg != Reg && (MO.isKill() ||
+ (LIS && isPlainlyKilled(MI, MOReg, LIS))))
+ Kills.insert(MOReg);
+ }
+ }
+
+ // Move the copies connected to MI down as well.
+ MachineBasicBlock::iterator Begin = MI;
+ MachineBasicBlock::iterator AfterMI = llvm::next(Begin);
+
+ MachineBasicBlock::iterator End = AfterMI;
+ while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
+ Defs.insert(End->getOperand(0).getReg());
+ ++End;
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+ for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(MOReg))
+ // Physical register use would be clobbered.
+ return false;
+ if (!MO.isDead() && Defs.count(MOReg))
+ // May clobber a physical register def.
+ // FIXME: This may be too conservative. It's ok if the instruction
+ // is sunken completely below the use.
+ return false;
+ } else {
+ if (Defs.count(MOReg))
+ return false;
+ bool isKill = MO.isKill() ||
+ (LIS && isPlainlyKilled(OtherMI, MOReg, LIS));
+ if (MOReg != Reg &&
+ ((isKill && Uses.count(MOReg)) || Kills.count(MOReg)))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ if (MOReg == Reg && !isKill)
+ // We can't schedule across a use of the register in question.
+ return false;
+ // Ensure that if this is register in question, its the kill we expect.
+ assert((MOReg != Reg || OtherMI == KillMI) &&
+ "Found multiple kills of a register in a basic block");
+ }
+ }
+ }
+
+ // Move debug info as well.
+ while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue())
+ --Begin;
+
+ nmi = End;
+ MachineBasicBlock::iterator InsertPos = KillPos;
+ if (LIS) {
+ // We have to move the copies first so that the MBB is still well-formed
+ // when calling handleMove().
+ for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
+ MachineInstr *CopyMI = MBBI;
+ ++MBBI;
+ MBB->splice(InsertPos, MBB, CopyMI);
+ LIS->handleMove(CopyMI);
+ InsertPos = CopyMI;
+ }
+ End = llvm::next(MachineBasicBlock::iterator(MI));
+ }
+
+ // Copies following MI may have been moved as well.
+ MBB->splice(InsertPos, MBB, Begin, End);
+ DistanceMap.erase(DI);
+
+ // Update live variables
+ if (LIS) {
+ LIS->handleMove(MI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ }
+
+ DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
+ return true;
+}
+
+/// isDefTooClose - Return true if the re-scheduling will put the given
+/// instruction too close to the defs of its register dependencies.
+bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+ MachineInstr *MI) {
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ MachineInstr *DefMI = &*DI;
+ if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike())
+ continue;
+ if (DefMI == MI)
+ return true; // MI is defining something KillMI uses
+ DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI);
+ if (DDI == DistanceMap.end())
+ return true; // Below MI
+ unsigned DefDist = DDI->second;
+ assert(Dist > DefDist && "Visited def already?");
+ if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
+ return true;
+ }
+ return false;
+}
+
+/// rescheduleKillAboveMI - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
+/// current two-address instruction in order to eliminate the need for the
+/// copy.
+bool TwoAddressInstructionPass::
+rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
+ return false;
+
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = 0;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!KillMI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ SmallSet<unsigned, 2> LiveDefs;
+ for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = KillMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ if (!MOReg)
+ continue;
+ if (isDefTooClose(MOReg, DI->second, MI))
+ return false;
+ bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS));
+ if (MOReg == Reg && !isKill)
+ return false;
+ Uses.insert(MOReg);
+ if (isKill && MOReg != Reg)
+ Kills.insert(MOReg);
+ } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ Defs.insert(MOReg);
+ if (!MO.isDead())
+ LiveDefs.insert(MOReg);
+ }
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ SmallVector<unsigned, 2> OtherDefs;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse()) {
+ if (Defs.count(MOReg))
+ // Moving KillMI can clobber the physical register if the def has
+ // not been seen.
+ return false;
+ if (Kills.count(MOReg))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ if (OtherMI != MI && MOReg == Reg &&
+ !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))))
+ // We can't schedule across a use of the register in question.
+ return false;
+ } else {
+ OtherDefs.push_back(MOReg);
+ }
+ }
+
+ for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
+ unsigned MOReg = OtherDefs[i];
+ if (Uses.count(MOReg))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ LiveDefs.count(MOReg))
+ return false;
+ // Physical register def is seen.
+ Defs.erase(MOReg);
+ }
+ }
+
+ // Move the old kill above MI, don't forget to move debug info as well.
+ MachineBasicBlock::iterator InsertPos = mi;
+ while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue())
+ --InsertPos;
+ MachineBasicBlock::iterator From = KillMI;
+ MachineBasicBlock::iterator To = llvm::next(From);
+ while (llvm::prior(From)->isDebugValue())
+ --From;
+ MBB->splice(InsertPos, MBB, From, To);
+
+ nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
+ DistanceMap.erase(DI);
+
+ // Update live variables
+ if (LIS) {
+ LIS->handleMove(KillMI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ }
+
+ DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
+ return true;
+}
+
+/// tryInstructionTransform - For the case where an instruction has a single
+/// pair of tied register operands, attempt some transformations that may
+/// either eliminate the tied operands or improve the opportunities for
+/// coalescing away the register copy. Returns true if no copy needs to be
+/// inserted to untie mi's operands (either because they were untied, or
+/// because mi was rescheduled, and will be visited again later). If the
+/// shouldOnlyCommute flag is true, only instruction commutation is attempted.
+bool TwoAddressInstructionPass::
+tryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist, bool shouldOnlyCommute) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ MachineInstr &MI = *mi;
+ unsigned regA = MI.getOperand(DstIdx).getReg();
+ unsigned regB = MI.getOperand(SrcIdx).getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ "cannot make instruction into two-address form");
+ bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+
+ if (TargetRegisterInfo::isVirtualRegister(regA))
+ scanUses(regA);
+
+ // Check if it is profitable to commute the operands.
+ unsigned SrcOp1, SrcOp2;
+ unsigned regC = 0;
+ unsigned regCIdx = ~0U;
+ bool TryCommute = false;
+ bool AggressiveCommute = false;
+ if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
+ TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
+ if (SrcIdx == SrcOp1)
+ regCIdx = SrcOp2;
+ else if (SrcIdx == SrcOp2)
+ regCIdx = SrcOp1;
+
+ if (regCIdx != ~0U) {
+ regC = MI.getOperand(regCIdx).getReg();
+ if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false))
+ // If C dies but B does not, swap the B and C operands.
+ // This makes the live ranges of A and C joinable.
+ TryCommute = true;
+ else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) {
+ TryCommute = true;
+ AggressiveCommute = true;
+ }
+ }
+ }
+
+ // If it's profitable to commute, try to do so.
+ if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
+ ++NumCommuted;
+ if (AggressiveCommute)
+ ++NumAggrCommuted;
+ return false;
+ }
+
+ if (shouldOnlyCommute)
+ return false;
+
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule this MI below it.
+ if (rescheduleMIBelowKill(mi, nmi, regB)) {
+ ++NumReSchedDowns;
+ return true;
+ }
+
+ if (MI.isConvertibleTo3Addr()) {
+ // This instruction is potentially convertible to a true
+ // three-address instruction. Check if it is profitable.
+ if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
+ // Try to convert it.
+ if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ return true; // Done with this instruction.
+ }
+ }
+ }
+
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule it before this MI if it's legal.
+ if (rescheduleKillAboveMI(mi, nmi, regB)) {
+ ++NumReSchedUps;
+ return true;
+ }
+
+ // If this is an instruction with a load folded into it, try unfolding
+ // the load, e.g. avoid this:
+ // movq %rdx, %rcx
+ // addq (%rax), %rcx
+ // in favor of this:
+ // movq (%rax), %rcx
+ // addq %rdx, %rcx
+ // because it's preferable to schedule a load than a register copy.
+ if (MI.mayLoad() && !regBKilled) {
+ // Determine if a load can be unfolded.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc != 0) {
+ const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
+ if (UnfoldMCID.getNumDefs() == 1) {
+ // Unfold the load.
+ DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
+ unsigned Reg = MRI->createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 2> NewMIs;
+ if (!TII->unfoldMemoryOperand(*MF, &MI, Reg,
+ /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
+ NewMIs)) {
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ return false;
+ }
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ // The load was previously folded, so this is the only use.
+ NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+ // Tentatively insert the instructions into the block so that they
+ // look "normal" to the transformation logic.
+ MBB->insert(mi, NewMIs[0]);
+ MBB->insert(mi, NewMIs[1]);
+
+ DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
+ << "2addr: NEW INST: " << *NewMIs[1]);
+
+ // Transform the instruction, now that it no longer has a load.
+ unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+ unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+ MachineBasicBlock::iterator NewMI = NewMIs[1];
+ bool TransformResult =
+ tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true);
+ (void)TransformResult;
+ assert(!TransformResult &&
+ "tryInstructionTransform() should return false.");
+ if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+ // Success, or at least we made an improvement. Keep the unfolded
+ // instructions and discard the original.
+ if (LV) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isUse()) {
+ if (MO.isKill()) {
+ if (NewMIs[0]->killsRegister(MO.getReg()))
+ LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]);
+ else {
+ assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+ "Kill missing after load unfold!");
+ LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]);
+ }
+ }
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) {
+ if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
+ else {
+ assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+ "Dead flag missing after load unfold!");
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]);
+ }
+ }
+ }
+ }
+ LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+ }
+
+ SmallVector<unsigned, 4> OrigRegs;
+ if (LIS) {
+ for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg())
+ OrigRegs.push_back(MOI->getReg());
+ }
+ }
+
+ MI.eraseFromParent();
+
+ // Update LiveIntervals.
+ if (LIS) {
+ MachineBasicBlock::iterator Begin(NewMIs[0]);
+ MachineBasicBlock::iterator End(NewMIs[1]);
+ LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs);
+ }
+
+ mi = NewMIs[1];
+ } else {
+ // Transforming didn't eliminate the tie and didn't lead to an
+ // improvement. Clean up the unfolded instructions and keep the
+ // original.
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+// Collect tied operands of MI that need to be handled.
+// Rewrite trivial cases immediately.
+// Return true if any tied operands where found, including the trivial ones.
+bool TwoAddressInstructionPass::
+collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool AnyOps = false;
+ unsigned NumOps = MI->getNumOperands();
+
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+ AnyOps = true;
+ MachineOperand &SrcMO = MI->getOperand(SrcIdx);
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned SrcReg = SrcMO.getReg();
+ unsigned DstReg = DstMO.getReg();
+ // Tied constraint already satisfied?
+ if (SrcReg == DstReg)
+ continue;
+
+ assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
+
+ // Deal with <undef> uses immediately - simply rewrite the src operand.
+ if (SrcMO.isUndef()) {
+ // Constrain the DstReg register class if required.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
+ TRI, *MF))
+ MRI->constrainRegClass(DstReg, RC);
+ SrcMO.setReg(DstReg);
+ DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+ continue;
+ }
+ TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+ return AnyOps;
+}
+
+// Process a list of tied MI operands that all use the same source register.
+// The tied pairs are of the form (SrcIdx, DstIdx).
+void
+TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
+ TiedPairList &TiedPairs,
+ unsigned &Dist) {
+ bool IsEarlyClobber = false;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second);
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+ }
+
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ SlotIndex LastCopyIdx;
+ unsigned RegB = 0;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ unsigned SrcIdx = TiedPairs[tpi].first;
+ unsigned DstIdx = TiedPairs[tpi].second;
+
+ const MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned RegA = DstMO.getReg();
+
+ // Grab RegB from the instruction because it may have changed if the
+ // instruction was commuted.
+ RegB = MI->getOperand(SrcIdx).getReg();
+
+ if (RegA == RegB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = RegA;
+
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !MI->getOperand(i).isReg() ||
+ MI->getOperand(i).getReg() != RegA);
+#endif
+
+ // Emit a copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+
+ // Update DistanceMap.
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ DistanceMap.insert(std::make_pair(PrevMI, Dist));
+ DistanceMap[MI] = ++Dist;
+
+ if (LIS) {
+ LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ LiveInterval &LI = LIS->getInterval(RegA);
+ VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ SlotIndex endIdx =
+ LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber);
+ LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI));
+ }
+ }
+
+ DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+
+ // Make sure regA is a legal regclass for the SrcIdx operand.
+ if (TargetRegisterInfo::isVirtualRegister(RegA) &&
+ TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
+
+ MO.setReg(RegA);
+
+ // Propagate SrcRegMap.
+ SrcRegMap[RegA] = RegB;
+ }
+
+
+ if (AllUsesCopied) {
+ if (!IsEarlyClobber) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ }
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) {
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ LV->addVirtualRegisterKilled(RegB, PrevMI);
+ }
+
+ // Update LiveIntervals.
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(RegB);
+ SlotIndex MIIdx = LIS->getInstructionIndex(MI);
+ LiveInterval::const_iterator I = LI.find(MIIdx);
+ assert(I != LI.end() && "RegB must be live-in to use.");
+
+ SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber);
+ if (I->end == UseIdx)
+ LI.removeRange(LastCopyIdx, UseIdx);
+ }
+
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetMachine &TM = MF->getTarget();
+ MRI = &MF->getRegInfo();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ InstrItins = TM.getInstrItineraryData();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+ AA = &getAnalysis<AliasAnalysis>();
+ OptLevel = TM.getOptLevel();
+
+ bool MadeChange = false;
+
+ DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+ DEBUG(dbgs() << "********** Function: "
+ << MF->getName() << '\n');
+
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
+ TiedOperandMap TiedOperands;
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MBB = MBBI;
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ SrcRegMap.clear();
+ DstRegMap.clear();
+ Processed.clear();
+ for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
+ mi != me; ) {
+ MachineBasicBlock::iterator nmi = llvm::next(mi);
+ if (mi->isDebugValue()) {
+ mi = nmi;
+ continue;
+ }
+
+ // Expand REG_SEQUENCE instructions. This will position mi at the first
+ // expanded instruction.
+ if (mi->isRegSequence())
+ eliminateRegSequence(mi);
+
+ DistanceMap.insert(std::make_pair(mi, ++Dist));
+
+ processCopy(&*mi);
+
+ // First scan through all the tied register uses in this instruction
+ // and record a list of pairs of tied operands for each register.
+ if (!collectTiedOperands(mi, TiedOperands)) {
+ mi = nmi;
+ continue;
+ }
+
+ ++NumTwoAddressInstrs;
+ MadeChange = true;
+ DEBUG(dbgs() << '\t' << *mi);
+
+ // If the instruction has a single pair of tied operands, try some
+ // transformations that may either eliminate the tied operands or
+ // improve the opportunities for coalescing away the register copy.
+ if (TiedOperands.size() == 1) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs
+ = TiedOperands.begin()->second;
+ if (TiedPairs.size() == 1) {
+ unsigned SrcIdx = TiedPairs[0].first;
+ unsigned DstIdx = TiedPairs[0].second;
+ unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
+ unsigned DstReg = mi->getOperand(DstIdx).getReg();
+ if (SrcReg != DstReg &&
+ tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
+ // The tied operands have been eliminated or shifted further down the
+ // block to ease elimination. Continue processing with 'nmi'.
+ TiedOperands.clear();
+ mi = nmi;
+ continue;
+ }
+ }
+ }
+
+ // Now iterate over the information collected above.
+ for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+ OE = TiedOperands.end(); OI != OE; ++OI) {
+ processTiedPairs(mi, OI->second, Dist);
+ DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ }
+
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ }
+
+ // Clear TiedOperands here instead of at the top of the loop
+ // since most instructions do not have tied operands.
+ TiedOperands.clear();
+ mi = nmi;
+ }
+ }
+
+ if (LIS)
+ MF->verify(this, "After two-address instruction pass");
+
+ return MadeChange;
+}
+
+/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process.
+///
+/// The instruction is turned into a sequence of sub-register copies:
+///
+/// %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1
+///
+/// Becomes:
+///
+/// %dst:ssub0<def,undef> = COPY %v1
+/// %dst:ssub1<def> = COPY %v2
+///
+void TwoAddressInstructionPass::
+eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = MBBI;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (MI->getOperand(0).getSubReg() ||
+ TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ !(MI->getNumOperands() & 1)) {
+ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+ llvm_unreachable(0);
+ }
+
+ SmallVector<unsigned, 4> OrigRegs;
+ if (LIS) {
+ OrigRegs.push_back(MI->getOperand(0).getReg());
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2)
+ OrigRegs.push_back(MI->getOperand(i).getReg());
+ }
+
+ bool DefEmitted = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+ MachineOperand &UseMO = MI->getOperand(i);
+ unsigned SrcReg = UseMO.getReg();
+ unsigned SubIdx = MI->getOperand(i+1).getImm();
+ // Nothing needs to be inserted for <undef> operands.
+ if (UseMO.isUndef())
+ continue;
+
+ // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+ // might insert a COPY that uses SrcReg after is was killed.
+ bool isKill = UseMO.isKill();
+ if (isKill)
+ for (unsigned j = i + 2; j < e; j += 2)
+ if (MI->getOperand(j).getReg() == SrcReg) {
+ MI->getOperand(j).setIsKill();
+ UseMO.setIsKill(false);
+ isKill = false;
+ break;
+ }
+
+ // Insert the sub-register copy.
+ MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, SubIdx)
+ .addOperand(UseMO);
+
+ // The first def needs an <undef> flag because there is no live register
+ // before it.
+ if (!DefEmitted) {
+ CopyMI->getOperand(0).setIsUndef(true);
+ // Return an iterator pointing to the first inserted instr.
+ MBBI = CopyMI;
+ }
+ DefEmitted = true;
+
+ // Update LiveVariables' kill info.
+ if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ LV->replaceKillInstruction(SrcReg, MI, CopyMI);
+
+ DEBUG(dbgs() << "Inserted: " << *CopyMI);
+ }
+
+ MachineBasicBlock::iterator EndMBBI =
+ llvm::next(MachineBasicBlock::iterator(MI));
+
+ if (!DefEmitted) {
+ DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+ MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+ MI->RemoveOperand(j);
+ } else {
+ DEBUG(dbgs() << "Eliminated: " << *MI);
+ MI->eraseFromParent();
+ }
+
+ // Udpate LiveIntervals.
+ if (LIS)
+ LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs);
+}
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 0000000..a95ebcd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,215 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass. Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node. To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack. In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks. Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+namespace {
+ class UnreachableBlockElim : public FunctionPass {
+ virtual bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElim() : FunctionPass(ID) {
+ initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ProfileInfo>();
+ }
+ };
+}
+char UnreachableBlockElim::ID = 0;
+INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
+ "Remove unreachable blocks from the CFG", false, false)
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+ SmallPtrSet<BasicBlock*, 8> Reachable;
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
+ df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(I)) {
+ BasicBlock *BB = I;
+ DeadBlocks.push_back(BB);
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+ BB->getInstList().pop_front();
+ }
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ // Actually remove the blocks now.
+ ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>();
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ if (PI) PI->removeBlock(DeadBlocks[i]);
+ DeadBlocks[i]->eraseFromParent();
+ }
+
+ return DeadBlocks.size();
+}
+
+
+namespace {
+ class UnreachableMachineBlockElim : public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ MachineModuleInfo *MMI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
+ };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks", false, false)
+
+char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
+
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+ bool ModifiedPHI = false;
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
+ I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<MachineBasicBlock*> DeadBlocks;
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+
+ // Test for deadness.
+ if (!Reachable.count(BB)) {
+ DeadBlocks.push_back(BB);
+
+ // Update dominator and loop info.
+ if (MLI) MLI->removeBlock(BB);
+ if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+
+ while (BB->succ_begin() != BB->succ_end()) {
+ MachineBasicBlock* succ = *BB->succ_begin();
+
+ MachineBasicBlock::iterator start = succ->begin();
+ while (start != succ->end() && start->isPHI()) {
+ for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
+ if (start->getOperand(i).isMBB() &&
+ start->getOperand(i).getMBB() == BB) {
+ start->RemoveOperand(i);
+ start->RemoveOperand(i-1);
+ }
+
+ start++;
+ }
+
+ BB->removeSuccessor(BB->succ_begin());
+ }
+ }
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+ DeadBlocks[i]->eraseFromParent();
+
+ // Cleanup PHI nodes.
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ // Prune unneeded PHI entries.
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
+ BB->pred_end());
+ MachineBasicBlock::iterator phi = BB->begin();
+ while (phi != BB->end() && phi->isPHI()) {
+ for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
+ if (!preds.count(phi->getOperand(i).getMBB())) {
+ phi->RemoveOperand(i);
+ phi->RemoveOperand(i-1);
+ ModifiedPHI = true;
+ }
+
+ if (phi->getNumOperands() == 3) {
+ unsigned Input = phi->getOperand(1).getReg();
+ unsigned Output = phi->getOperand(0).getReg();
+
+ MachineInstr* temp = phi;
+ ++phi;
+ temp->eraseFromParent();
+ ModifiedPHI = true;
+
+ if (Input != Output) {
+ MachineRegisterInfo &MRI = F.getRegInfo();
+ MRI.constrainRegClass(Input, MRI.getRegClass(Output));
+ MRI.replaceRegWith(Output, Input);
+ }
+
+ continue;
+ }
+
+ ++phi;
+ }
+ }
+
+ F.RenumberBlocks();
+
+ return (DeadBlocks.size() || ModifiedPHI);
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 0000000..cd012d2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,359 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "LiveDebugVariables.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpillSlots, "Number of spill slots allocated");
+STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
+
+//===----------------------------------------------------------------------===//
+// VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+ MRI = &mf.getRegInfo();
+ TII = mf.getTarget().getInstrInfo();
+ TRI = mf.getTarget().getRegisterInfo();
+ MF = &mf;
+
+ Virt2PhysMap.clear();
+ Virt2StackSlotMap.clear();
+ Virt2SplitMap.clear();
+
+ grow();
+ return false;
+}
+
+void VirtRegMap::grow() {
+ unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+ Virt2PhysMap.resize(NumRegs);
+ Virt2StackSlotMap.resize(NumRegs);
+ Virt2SplitMap.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+ ++NumSpillSlots;
+ return SS;
+}
+
+bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) {
+ unsigned Hint = MRI->getSimpleHint(VirtReg);
+ if (!Hint)
+ return 0;
+ if (TargetRegisterInfo::isVirtualRegister(Hint))
+ Hint = getPhys(Hint);
+ return getPhys(VirtReg) == Hint;
+}
+
+bool VirtRegMap::hasKnownPreference(unsigned VirtReg) {
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
+ if (TargetRegisterInfo::isPhysicalRegister(Hint.second))
+ return true;
+ if (TargetRegisterInfo::isVirtualRegister(Hint.second))
+ return hasPhys(Hint.second);
+ return false;
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+ return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ assert((SS >= 0 ||
+ (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+ "illegal fixed frame index");
+ Virt2StackSlotMap[virtReg] = SS;
+}
+
+void VirtRegMap::print(raw_ostream &OS, const Module*) const {
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> "
+ << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ << MRI->getRegClass(Reg)->getName() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << MRI->getRegClass(Reg)->getName() << "\n";
+ }
+ }
+ OS << '\n';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VirtRegMap::dump() const {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// VirtRegRewriter
+//===----------------------------------------------------------------------===//
+//
+// The VirtRegRewriter is the last of the register allocator passes.
+// It rewrites virtual registers to physical registers as specified in the
+// VirtRegMap analysis. It also updates live-in information on basic blocks
+// according to LiveIntervals.
+//
+namespace {
+class VirtRegRewriter : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
+ VirtRegMap *VRM;
+
+ void rewrite();
+ void addMBBLiveIns();
+public:
+ static char ID;
+ VirtRegRewriter() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual bool runOnMachineFunction(MachineFunction&);
+};
+} // end anonymous namespace
+
+char &llvm::VirtRegRewriterID = VirtRegRewriter::ID;
+
+INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+
+char VirtRegRewriter::ID = 0;
+
+void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ TM = &MF->getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ MRI = &MF->getRegInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+ DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+ << "********** Function: "
+ << MF->getName() << '\n');
+ DEBUG(VRM->dump());
+
+ // Add kill flags while we still have virtual registers.
+ LIS->addKillFlags(VRM);
+
+ // Live-in lists on basic blocks are required for physregs.
+ addMBBLiveIns();
+
+ // Rewrite virtual registers.
+ rewrite();
+
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
+ return true;
+}
+
+// Compute MBB live-in lists from virtual register live ranges and their
+// assignments.
+void VirtRegRewriter::addMBBLiveIns() {
+ SmallVector<MachineBasicBlock*, 16> LiveIn;
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
+ if (MRI->reg_nodbg_empty(VirtReg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ if (LI.empty() || LIS->intervalIsInOneMBB(LI))
+ continue;
+ // This is a virtual register that is live across basic blocks. Its
+ // assigned PhysReg must be marked as live-in to those blocks.
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
+
+ // Scan the segments of LI.
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E;
+ ++I) {
+ if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn))
+ continue;
+ for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
+ if (!LiveIn[i]->isLiveIn(PhysReg))
+ LiveIn[i]->addLiveIn(PhysReg);
+ LiveIn.clear();
+ }
+ }
+}
+
+void VirtRegRewriter::rewrite() {
+ SmallVector<unsigned, 8> SuperDeads;
+ SmallVector<unsigned, 8> SuperDefs;
+ SmallVector<unsigned, 8> SuperKills;
+
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ DEBUG(MBBI->print(dbgs(), Indexes));
+ for (MachineBasicBlock::instr_iterator
+ MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
+ MachineInstr *MI = MII;
+ ++MII;
+
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask())
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ unsigned VirtReg = MO.getReg();
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
+ "Instruction uses unmapped VirtReg");
+ assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
+
+ // Preserve semantics of sub-register operands.
+ if (MO.getSubReg()) {
+ // A virtual register kill refers to the whole register, so we may
+ // have to add <imp-use,kill> operands for the super-register. A
+ // partial redef always kills and redefines the super-register.
+ if (MO.readsReg() && (MO.isDef() || MO.isKill()))
+ SuperKills.push_back(PhysReg);
+
+ if (MO.isDef()) {
+ // The <def,undef> flag only makes sense for sub-register defs, and
+ // we are substituting a full physreg. An <imp-use,kill> operand
+ // from the SuperKills list will represent the partial read of the
+ // super-register.
+ MO.setIsUndef(false);
+
+ // Also add implicit defs for the super-register.
+ if (MO.isDead())
+ SuperDeads.push_back(PhysReg);
+ else
+ SuperDefs.push_back(PhysReg);
+ }
+
+ // PhysReg operands cannot have subregister indexes.
+ PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+ assert(PhysReg && "Invalid SubReg for physical register");
+ MO.setSubReg(0);
+ }
+ // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+ // we need the inlining here.
+ MO.setReg(PhysReg);
+ }
+
+ // Add any missing super-register kills after rewriting the whole
+ // instruction.
+ while (!SuperKills.empty())
+ MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+ while (!SuperDeads.empty())
+ MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+
+ while (!SuperDefs.empty())
+ MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+
+ DEBUG(dbgs() << "> " << *MI);
+
+ // Finally, remove any identity copies.
+ if (MI->isIdentityCopy()) {
+ ++NumIdCopies;
+ if (MI->getNumOperands() == 2) {
+ DEBUG(dbgs() << "Deleting identity copy.\n");
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ // It's safe to erase MI because MII has already been incremented.
+ MI->eraseFromParent();
+ } else {
+ // Transform identity copy to a KILL to deal with subregisters.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "Identity copy: " << *MI);
+ }
+ }
+ }
+ }
+
+ // Tell MRI about physical registers in use.
+ for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+ if (!MRI->reg_nodbg_empty(Reg))
+ MRI->setPhysRegUsed(Reg);
+}
OpenPOWER on IntegriCloud